DEPTHWISE_CONV_2D with per-channel quantization.

Variant of DEPTHWISE_CONV_2D operand that takes:
- TENSOR_QUANT8_ASYMM as input tensor.
- TENSOR_QUANT8_ASYMM as output tensor.
- TENSOR_INT32 as bias tensor.
- TENSOR_QUANT8_SYMM_PER_CHANNEL as filter tensor.

Filter tensor channel dimension has to be 3.

Bias tensor scale value has to be set to 0. Its actual
scaling values depends on value position in the tensor:
bias[d].scale = input_tensor.scale * filter.scales[d]
We may want introduce TENSOR_INT32_PER_CHANNEL for
validation of biases scaling.

Bug: 119255406
Test: Vts/Cts NNAPI tests.
Change-Id: I355dd548794807929d335e9dbe0a59a36419818e
Merged-In: I355dd548794807929d335e9dbe0a59a36419818e
(cherry picked from commit 1ed126134cf8b0d01b41aa52e167fa54acb13849)
diff --git a/common/CpuExecutor.cpp b/common/CpuExecutor.cpp
index 13e375c..19e825a 100644
--- a/common/CpuExecutor.cpp
+++ b/common/CpuExecutor.cpp
@@ -672,6 +672,7 @@
             }
             const RunTimeOperandInfo& input  = mOperands[ins[0]];
             const RunTimeOperandInfo& filter = mOperands[ins[1]];
+            const Operand& filterOperand = mModel->operands[ins[1]];
             const RunTimeOperandInfo& bias   = mOperands[ins[2]];
 
             int32_t padding_left, padding_right;
@@ -756,13 +757,24 @@
                         depth_multiplier, activation,
                         reinterpret_cast<_Float16*>(output_tmp.buffer), outShape);
             } else if (input_tmp.type == OperandType::TENSOR_QUANT8_ASYMM) {
-                success = depthwiseConvQuant8(
-                        reinterpret_cast<const uint8_t*>(input_tmp.buffer), input_tmp.shape(),
-                        reinterpret_cast<const uint8_t*>(filter.buffer), filter.shape(),
-                        reinterpret_cast<const int32_t*>(bias.buffer), bias.shape(), padding_left,
-                        padding_right, padding_top, padding_bottom, stride_width, stride_height,
-                        depth_multiplier, activation, reinterpret_cast<uint8_t*>(output_tmp.buffer),
-                        outShape);
+                if (filter.type == OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL) {
+                    success = depthwiseConvQuant8PerChannel(
+                            reinterpret_cast<const uint8_t*>(input_tmp.buffer), input_tmp.shape(),
+                            reinterpret_cast<const int8_t*>(filter.buffer), filter.shape(),
+                            filterOperand.extraParams.channelQuant().scales.data(),
+                            reinterpret_cast<const int32_t*>(bias.buffer), bias.shape(),
+                            padding_left, padding_right, padding_top, padding_bottom, stride_width,
+                            stride_height, depth_multiplier, activation,
+                            reinterpret_cast<uint8_t*>(output_tmp.buffer), outShape);
+                } else if (filter.type == OperandType::TENSOR_QUANT8_ASYMM) {
+                    success = depthwiseConvQuant8(
+                            reinterpret_cast<const uint8_t*>(input_tmp.buffer), input_tmp.shape(),
+                            reinterpret_cast<const uint8_t*>(filter.buffer), filter.shape(),
+                            reinterpret_cast<const int32_t*>(bias.buffer), bias.shape(),
+                            padding_left, padding_right, padding_top, padding_bottom, stride_width,
+                            stride_height, depth_multiplier, activation,
+                            reinterpret_cast<uint8_t*>(output_tmp.buffer), outShape);
+                }
             }
             if (data_layout) {
                 output_tmp_guard.reset(output_tmp.buffer);
@@ -866,7 +878,6 @@
                             padding_left, padding_right, padding_top, padding_bottom, stride_width,
                             stride_height, activation,
                             reinterpret_cast<uint8_t*>(output_tmp.buffer), outShape);
-
                 } else if (filter.type == OperandType::TENSOR_QUANT8_ASYMM) {
                     success = convQuant8(
                             reinterpret_cast<const uint8_t*>(input_tmp.buffer), input_tmp.shape(),
diff --git a/common/OperationsUtils.cpp b/common/OperationsUtils.cpp
index b0399f0..72edd2a 100644
--- a/common/OperationsUtils.cpp
+++ b/common/OperationsUtils.cpp
@@ -367,7 +367,11 @@
                           int32_t padding_top, int32_t padding_bottom,
                           int32_t stride_width, int32_t stride_height,
                           Shape* output) {
-    NN_OPS_CHECK(input.type == filter.type);
+    if (filter.type == OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL) {
+        NN_OPS_CHECK(input.type == OperandType::TENSOR_QUANT8_ASYMM);
+    } else {
+        NN_OPS_CHECK(input.type == filter.type);
+    }
     if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
         NN_OPS_CHECK(bias.type == OperandType::TENSOR_INT32);
     } else {
diff --git a/common/Utils.cpp b/common/Utils.cpp
index 336086e..a643306 100644
--- a/common/Utils.cpp
+++ b/common/Utils.cpp
@@ -763,6 +763,7 @@
                 return ANEURALNETWORKS_BAD_DATA;
             }
             auto inputType = operands[inputIndexes[0]].type;
+            auto filterType = operands[inputIndexes[1]].type;
             std::vector<OperandType> inExpectedTypes;
             std::vector<OperandType> outExpectedTypes;
             if (inputType == OperandType::TENSOR_FLOAT32) {
@@ -784,10 +785,19 @@
                 };
                 outExpectedTypes = {OperandType::TENSOR_FLOAT16};
             } else if (inputType == OperandType::TENSOR_QUANT8_ASYMM) {
-                NN_RETURN_IF_ERROR(validateHalVersion(opType, halVersion, HalVersion::V1_0));
+                if (filterType == OperandType::TENSOR_QUANT8_ASYMM) {
+                    NN_RETURN_IF_ERROR(validateHalVersion(opType, halVersion, HalVersion::V1_0));
+                } else if (filterType == OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL) {
+                    NN_RETURN_IF_ERROR(validateHalVersion(opType, halVersion, HalVersion::V1_2));
+                } else {
+                    LOG(ERROR) << "Unsupported filter tensor type for operation "
+                               << kOperationNames[opType];
+                    return ANEURALNETWORKS_BAD_DATA;
+                }
+
                 inExpectedTypes = {
                         OperandType::TENSOR_QUANT8_ASYMM,
-                        OperandType::TENSOR_QUANT8_ASYMM,
+                        filterType,
                         OperandType::TENSOR_INT32,
                         OperandType::INT32,
                         OperandType::INT32,
diff --git a/common/include/Operations.h b/common/include/Operations.h
index 76b9626..b5df028 100644
--- a/common/include/Operations.h
+++ b/common/include/Operations.h
@@ -83,6 +83,14 @@
                          int32_t paddingRight, int32_t paddingTop, int32_t paddingBottom,
                          int32_t strideWidth, int32_t strideHeight, int32_t depthMultiplier,
                          int32_t activation, uint8_t* outputData, const Shape& outputShape);
+bool depthwiseConvQuant8PerChannel(const uint8_t* inputData, const Shape& inputShape,
+                                   const int8_t* filterData, const Shape& filterShape,
+                                   const float* filterScales, const int32_t* biasData,
+                                   const Shape& biasShape, int32_t paddingLeft,
+                                   int32_t paddingRight, int32_t paddingTop, int32_t paddingBottom,
+                                   int32_t strideWidth, int32_t strideHeight,
+                                   int32_t depthMultiplier, int32_t activation, uint8_t* outputData,
+                                   const Shape& outputShape);
 
 bool convFloat16(const _Float16* inputData, const Shape& inputShape, const _Float16* filterData,
                  const Shape& filterShape, const _Float16* biasData, const Shape& biasShape,
diff --git a/common/operations/DepthwiseConv2D.cpp b/common/operations/DepthwiseConv2D.cpp
index 9728cf1..b0ed79b 100644
--- a/common/operations/DepthwiseConv2D.cpp
+++ b/common/operations/DepthwiseConv2D.cpp
@@ -125,6 +125,102 @@
     return true;
 }
 
+bool depthwiseConvQuant8PerChannel(const uint8_t* inputData, const Shape& inputShape,
+                                   const int8_t* filterData, const Shape& filterShape,
+                                   const float* filterScales, const int32_t* biasData,
+                                   const Shape& biasShape, int32_t paddingLeft,
+                                   int32_t paddingRight, int32_t paddingTop, int32_t paddingBottom,
+                                   int32_t strideWidth, int32_t strideHeight,
+                                   int32_t depthMultiplier, int32_t activation, uint8_t* outputData,
+                                   const Shape& outputShape) {
+    NNTRACE_TRANS("depthwiseConvQuant8");
+
+    uint32_t paddingHeight = (uint32_t)paddingTop;
+    uint32_t paddingWidth = (uint32_t)paddingLeft;
+
+    uint32_t numBatches = getSizeOfDimension(inputShape, 0);
+    uint32_t inputHeight = getSizeOfDimension(inputShape, 1);
+    uint32_t inputWidth = getSizeOfDimension(inputShape, 2);
+    uint32_t inputDepth = getSizeOfDimension(inputShape, 3);
+    uint32_t filterHeight = getSizeOfDimension(filterShape, 1);
+    uint32_t filterWidth = getSizeOfDimension(filterShape, 2);
+    uint32_t filterDepth = getSizeOfDimension(filterShape, 3);
+    uint32_t outputHeight = getSizeOfDimension(outputShape, 1);
+    uint32_t outputWidth = getSizeOfDimension(outputShape, 2);
+    uint32_t outputDepth = getSizeOfDimension(outputShape, 3);
+
+    int32_t inputOffset = -inputShape.offset;
+    int32_t outputOffset = outputShape.offset;
+
+    auto realMultiplier = std::vector<float>(outputDepth, .0f);
+    auto outputMultiplier = std::vector<int32_t>(outputDepth, 0);
+    auto outputShift = std::vector<int32_t>(outputDepth, .0f);
+
+    for (int i = 0; i < outputDepth; ++i) {
+        Shape filterChannelShape = filterShape;
+        filterChannelShape.scale = filterScales[i];
+        Shape biasChannelShape = biasShape;
+        biasChannelShape.scale = filterScales[i] * inputShape.scale;
+
+        if (!GetQuantizedConvolutionMultipler(inputShape, filterChannelShape, biasChannelShape,
+                                              outputShape, &realMultiplier[i]) ||
+            !QuantizeMultiplierSmallerThanOne(realMultiplier[i], &outputMultiplier[i],
+                                              &outputShift[i])) {
+            return false;
+        }
+    }
+
+    int32_t output_activation_min = 0, output_activation_max = 0;
+    CalculateActivationRangeUint8(activation, outputShape, &output_activation_min,
+                                  &output_activation_max);
+
+    const uint8_t* inputBase = inputData;
+    uint8_t* outPtr = outputData;
+    for (uint32_t b = 0; b < numBatches; b++) {
+        for (uint32_t h = 0; h < outputHeight; h++) {
+            for (uint32_t w = 0; w < outputWidth; w++) {
+                for (uint32_t ic = 0; ic < inputDepth; ic++) {
+                    for (uint32_t m = 0; m < depthMultiplier; m++) {
+                        int32_t wInputOrigin = static_cast<int32_t>(w) * strideWidth - paddingLeft;
+                        int32_t hInputOrigin = static_cast<int32_t>(h) * strideHeight - paddingTop;
+                        const int oc = m + ic * depthMultiplier;
+
+                        int32_t sum = 0.0f;
+                        for (uint32_t i = 0; i < filterHeight; i++) {
+                            for (uint32_t j = 0; j < filterWidth; j++) {
+                                int32_t hInput = hInputOrigin + static_cast<int32_t>(i);
+                                int32_t wInput = wInputOrigin + static_cast<int32_t>(j);
+
+                                if (hInput >= 0 && hInput < static_cast<int32_t>(inputHeight) &&
+                                    wInput >= 0 && wInput < static_cast<int32_t>(inputWidth)) {
+                                    uint32_t filterIndex =
+                                            i * filterWidth * filterDepth + j * filterDepth + oc;
+                                    uint32_t inputIndex = hInput * inputWidth * inputDepth +
+                                                          wInput * inputDepth + ic;
+                                    sum += (static_cast<int32_t>(filterData[filterIndex])) *
+                                           (static_cast<int32_t>(inputBase[inputIndex]) +
+                                            inputOffset);
+                                }
+                            }
+                        }
+
+                        sum += biasData[oc];
+                        sum = tflite::MultiplyByQuantizedMultiplier(sum, outputMultiplier[oc],
+                                                                    -outputShift[oc]);
+                        sum += outputOffset;
+                        sum = std::max(std::min(sum, output_activation_max), output_activation_min);
+                        outPtr[m] = static_cast<uint8_t>(sum);
+                    }
+                    outPtr += depthMultiplier;
+                }
+            }
+        }
+        inputBase += inputHeight * inputWidth * inputDepth;
+    }
+
+    return true;
+}
+
 #undef ANDROID_NN_DEPTHWISE_CONV_PARAMETERS
 }  // namespace nn
 }  // namespace android
diff --git a/runtime/include/NeuralNetworks.h b/runtime/include/NeuralNetworks.h
index 82daee9..5e8dbb6 100644
--- a/runtime/include/NeuralNetworks.h
+++ b/runtime/include/NeuralNetworks.h
@@ -462,9 +462,21 @@
      *             filter[1, di, dj, k * channel_multiplier + q]
      *         ) + bias[k * channel_multiplier + q]
      *
-     * Supported tensor {@link OperandCode}:
-     * * {@link ANEURALNETWORKS_TENSOR_FLOAT32}
-     * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}
+     * Supported tensor {@link OperandCode} configurations:
+     * * 32 bit Floating point :
+     * * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} for input, filter, output, and bias.
+     *
+     * * Quantized:
+     * * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} for input, filter, and output.
+     * * * {@link ANEURALNETWORKS_TENSOR_INT32} for bias (with scale set to
+     * * * input.scale * filter.scale).
+     *
+     * Available since API level 29:
+     * * Quantized with symetric per channel quantization for the filter:
+     * * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} for input, and output.
+     * * * {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL} for filter.
+     * * * {@link ANEURALNETWORKS_TENSOR_INT32} for bias (scale set to 0.0,
+     * * * each value scaling is seperate and equal to input.scale * filter.scales[channel]).
      *
      * Supported tensor rank: 4, with "NHWC" or "NCHW" data layout.
      * With the default data layout NHWC, the data is stored in the order of:
@@ -477,13 +489,19 @@
      * * 0: A 4-D tensor, of shape [batches, height, width, depth_in],
      *      specifying the input.
      * * 1: A 4-D tensor, of shape [1, filter_height, filter_width, depth_out],
-     *      specifying the filter.
+     *      specifying the filter. For tensor of type
+     *      {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL} the channel
+     *      dimension (extraParams.channelQuant.channelDim) must be set to 3.
      * * 2: A 1-D tensor, of shape [depth_out], specifying the bias. For input
-     *      tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, the bias should
-     *      also be of {@link ANEURALNETWORKS_TENSOR_FLOAT32}. For input tensor
-     *      of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, the bias should be
-     *      of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0 and
-     *      bias_scale == input_scale * filter_scale.
+     *      tensor of type {@link ANEURALNETWORKS_TENSOR_FLOAT32} or
+     *      {@link ANEURALNETWORKS_TENSOR_FLOAT16}, the bias must be of the same
+     *      type. For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
+     *      the bias should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint
+     *      of 0 and bias_scale == input_scale * filter_scale. For filter tensor
+     *      of {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL}, the bias
+     *      should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of
+     *      0 and bias_scale of 0. The actual scale of each value 'i' is equal to
+     *      bias_scale[i] = input_scale * filter_scale[i].
      * * 3: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on
      *      the left, in the ‘width’ dimension.
      * * 4: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on
@@ -511,11 +529,15 @@
      * * 1: A 4-D tensor, of shape [1, filter_height, filter_width, depth_out],
      *      specifying the filter.
      * * 2: A 1-D tensor, of shape [depth_out], specifying the bias. For input
-     *      tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, the bias should
-     *      also be of {@link ANEURALNETWORKS_TENSOR_FLOAT32}. For input tensor
-     *      of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, the bias should be
-     *      of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0 and
-     *      bias_scale == input_scale * filter_scale.
+     *      tensor of type {@link ANEURALNETWORKS_TENSOR_FLOAT32} or
+     *      {@link ANEURALNETWORKS_TENSOR_FLOAT16}, the bias must be of the same
+     *      type. For filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM},
+     *      the bias should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint
+     *      of 0 and bias_scale == input_scale * filter_scale. For filter tensor
+     *      of {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL}, the bias
+     *      should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of
+     *      0 and bias_scale of 0. The actual scale of each value 'i' is equal to
+     *      bias_scale[i] = input_scale * filter_scale[i].
      * * 3: An {@link ANEURALNETWORKS_INT32} scalar, specifying the implicit
      *      padding scheme, has to be one of the
      *      {@link PaddingCode} values.
@@ -536,7 +558,9 @@
      * * 0: The output 4-D tensor, of shape
      *      [batches, out_height, out_width, depth_out]. For output tensor of
      *      {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, the following condition
-     *      must be satisfied: output_scale > input_scale * filter_scale.
+     *      must be satisfied: output_scale > input_scale * filter_scale (for
+     *      filter tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL}
+     *      this condition must be true for all filter scales).
      *
      * Available since API level 27.
      */
diff --git a/runtime/test/TestValidateOperations.cpp b/runtime/test/TestValidateOperations.cpp
index 89d096f..3968358 100644
--- a/runtime/test/TestValidateOperations.cpp
+++ b/runtime/test/TestValidateOperations.cpp
@@ -917,30 +917,46 @@
     convOpTest(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL);
 }
 
-void depthwiseConvOpTest(int32_t operandCode) {
+void depthwiseConvOpTest(int32_t inputOperandCode, int32_t filterOperandCode) {
     uint32_t inputDimensions[4] = {1, 2, 2, 2};
-    ANeuralNetworksOperandType input = {.type = operandCode,
+    ANeuralNetworksOperandType input = {.type = inputOperandCode,
                                         .dimensionCount = 4,
                                         .dimensions = inputDimensions,
                                         .scale = 0.0f,
                                         .zeroPoint = 0};
-    if (operandCode == ANEURALNETWORKS_TENSOR_QUANT8_ASYMM) {
+    if (inputOperandCode == ANEURALNETWORKS_TENSOR_QUANT8_ASYMM) {
         input.scale = 0.5f;
     }
-
-    ANeuralNetworksOperandType filter = input;
     ANeuralNetworksOperandType output = input;
 
+    float filterScales[2] = {0.5f, 1.0f};
+    ANeuralNetworksOperandType filter = {.type = filterOperandCode,
+                                         .dimensionCount = 4,
+                                         .dimensions = inputDimensions,
+                                         .scale = 0.0f,
+                                         .zeroPoint = 0};
+    if (filterOperandCode == ANEURALNETWORKS_TENSOR_QUANT8_ASYMM) {
+        filter.scale = 0.5f;
+    }
+    if (filterOperandCode == ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL) {
+        filter.extraParams.channelQuant = {
+                .scales = filterScales, .channelDim = 3, .scaleCount = 2};
+    }
+
     uint32_t biasDimensions[1] = {2};
-    ANeuralNetworksOperandType bias = {.type = operandCode,
+    ANeuralNetworksOperandType bias = {.type = inputOperandCode,
                                        .dimensionCount = 1,
                                        .dimensions = biasDimensions,
                                        .scale = 0.0f,
                                        .zeroPoint = 0};
-    if (operandCode == ANEURALNETWORKS_TENSOR_QUANT8_ASYMM) {
+    if (filterOperandCode == ANEURALNETWORKS_TENSOR_QUANT8_ASYMM) {
         bias.type = ANEURALNETWORKS_TENSOR_INT32;
         bias.scale = 0.25f;
     }
+    if (filterOperandCode == ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL) {
+        bias.type = ANEURALNETWORKS_TENSOR_INT32;
+        bias.scale = 0.0f;
+    }
 
     ANeuralNetworksOperandType scalar = {.type = ANEURALNETWORKS_INT32,
                                          .dimensionCount = 0,
@@ -1010,15 +1026,20 @@
 }
 
 TEST(OperationValidationTest, DEPTHWISE_CONV_2D_float32) {
-    depthwiseConvOpTest(ANEURALNETWORKS_TENSOR_FLOAT32);
+    depthwiseConvOpTest(ANEURALNETWORKS_TENSOR_FLOAT32, ANEURALNETWORKS_TENSOR_FLOAT32);
 }
 
 TEST(OperationValidationTest, DEPTHWISE_CONV_2D_float16) {
-    depthwiseConvOpTest(ANEURALNETWORKS_TENSOR_FLOAT16);
+    depthwiseConvOpTest(ANEURALNETWORKS_TENSOR_FLOAT16, ANEURALNETWORKS_TENSOR_FLOAT16);
 }
 
 TEST(OperationValidationTest, DEPTHWISE_CONV_2D_quant8) {
-    depthwiseConvOpTest(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM);
+    depthwiseConvOpTest(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM, ANEURALNETWORKS_TENSOR_QUANT8_ASYMM);
+}
+
+TEST(OperationValidationTest, DEPTHWISE_CONV_2D_quant8_per_channel) {
+    depthwiseConvOpTest(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM,
+                        ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL);
 }
 
 void fullyConnectedOpTest(int32_t operandCode) {
diff --git a/runtime/test/for-cts/TestGeneratedOneFile.cpp b/runtime/test/for-cts/TestGeneratedOneFile.cpp
index 27deca1..9211e35 100644
--- a/runtime/test/for-cts/TestGeneratedOneFile.cpp
+++ b/runtime/test/for-cts/TestGeneratedOneFile.cpp
@@ -346,6 +346,7 @@
 #include "../generated/tests/conv2d_v1_2.mod.py.cpp"
 #include "../generated/tests/conv_per_channel.mod.py.cpp"
 #include "../generated/tests/depth_to_space_v1_2.mod.py.cpp"
+#include "../generated/tests/depthwise_conv2d_per_channel.mod.py.cpp"
 #include "../generated/tests/depthwise_conv2d_v1_2.mod.py.cpp"
 #include "../generated/tests/dequantize_float16.mod.py.cpp"
 #include "../generated/tests/div_broadcast_float16.mod.py.cpp"
diff --git a/runtime/test/generated/all_generated_V1_2_vts_tests.cpp b/runtime/test/generated/all_generated_V1_2_vts_tests.cpp
index c194a70..6702208 100644
--- a/runtime/test/generated/all_generated_V1_2_vts_tests.cpp
+++ b/runtime/test/generated/all_generated_V1_2_vts_tests.cpp
@@ -5059,6 +5059,134 @@
 }
 
 
+// Generated from: depthwise_conv2d_per_channel.mod.py.
+namespace depthwise_conv2d_per_channel {
+// Generated depthwise_conv2d_per_channel test
+#include "examples/depthwise_conv2d_per_channel.example.cpp"
+// Generated model constructor
+#include "vts_models/depthwise_conv2d_per_channel.model.cpp"
+} // namespace depthwise_conv2d_per_channel
+
+TEST_F(NeuralnetworksHidlTest, depthwise_conv2d_per_channel_same) {
+  generated_tests::Execute(device,
+                           depthwise_conv2d_per_channel::createTestModel_same,
+                           depthwise_conv2d_per_channel::is_ignored_same,
+                           depthwise_conv2d_per_channel::get_examples_same());
+}
+
+TEST_F(ValidationTest, depthwise_conv2d_per_channel_same) {
+  const Model model = depthwise_conv2d_per_channel::createTestModel_same();
+  const std::vector<Request> requests = createRequests(depthwise_conv2d_per_channel::get_examples_same());
+  validateModel(model);
+  validateRequests(model, requests);
+}
+
+
+TEST_F(NeuralnetworksHidlTest, depthwise_conv2d_per_channel_same_weight_as_input) {
+  generated_tests::Execute(device,
+                           depthwise_conv2d_per_channel::createTestModel_same_weight_as_input,
+                           depthwise_conv2d_per_channel::is_ignored_same_weight_as_input,
+                           depthwise_conv2d_per_channel::get_examples_same_weight_as_input());
+}
+
+TEST_F(ValidationTest, depthwise_conv2d_per_channel_same_weight_as_input) {
+  const Model model = depthwise_conv2d_per_channel::createTestModel_same_weight_as_input();
+  const std::vector<Request> requests = createRequests(depthwise_conv2d_per_channel::get_examples_same_weight_as_input());
+  validateModel(model);
+  validateRequests(model, requests);
+}
+
+
+TEST_F(NeuralnetworksHidlTest, depthwise_conv2d_per_channel_different) {
+  generated_tests::Execute(device,
+                           depthwise_conv2d_per_channel::createTestModel_different,
+                           depthwise_conv2d_per_channel::is_ignored_different,
+                           depthwise_conv2d_per_channel::get_examples_different());
+}
+
+TEST_F(ValidationTest, depthwise_conv2d_per_channel_different) {
+  const Model model = depthwise_conv2d_per_channel::createTestModel_different();
+  const std::vector<Request> requests = createRequests(depthwise_conv2d_per_channel::get_examples_different());
+  validateModel(model);
+  validateRequests(model, requests);
+}
+
+
+TEST_F(NeuralnetworksHidlTest, depthwise_conv2d_per_channel_different_weight_as_input) {
+  generated_tests::Execute(device,
+                           depthwise_conv2d_per_channel::createTestModel_different_weight_as_input,
+                           depthwise_conv2d_per_channel::is_ignored_different_weight_as_input,
+                           depthwise_conv2d_per_channel::get_examples_different_weight_as_input());
+}
+
+TEST_F(ValidationTest, depthwise_conv2d_per_channel_different_weight_as_input) {
+  const Model model = depthwise_conv2d_per_channel::createTestModel_different_weight_as_input();
+  const std::vector<Request> requests = createRequests(depthwise_conv2d_per_channel::get_examples_different_weight_as_input());
+  validateModel(model);
+  validateRequests(model, requests);
+}
+
+
+TEST_F(NeuralnetworksHidlTest, depthwise_conv2d_per_channel_layout_nhwc) {
+  generated_tests::Execute(device,
+                           depthwise_conv2d_per_channel::createTestModel_layout_nhwc,
+                           depthwise_conv2d_per_channel::is_ignored_layout_nhwc,
+                           depthwise_conv2d_per_channel::get_examples_layout_nhwc());
+}
+
+TEST_F(ValidationTest, depthwise_conv2d_per_channel_layout_nhwc) {
+  const Model model = depthwise_conv2d_per_channel::createTestModel_layout_nhwc();
+  const std::vector<Request> requests = createRequests(depthwise_conv2d_per_channel::get_examples_layout_nhwc());
+  validateModel(model);
+  validateRequests(model, requests);
+}
+
+
+TEST_F(NeuralnetworksHidlTest, depthwise_conv2d_per_channel_layout_nhwc_weight_as_input) {
+  generated_tests::Execute(device,
+                           depthwise_conv2d_per_channel::createTestModel_layout_nhwc_weight_as_input,
+                           depthwise_conv2d_per_channel::is_ignored_layout_nhwc_weight_as_input,
+                           depthwise_conv2d_per_channel::get_examples_layout_nhwc_weight_as_input());
+}
+
+TEST_F(ValidationTest, depthwise_conv2d_per_channel_layout_nhwc_weight_as_input) {
+  const Model model = depthwise_conv2d_per_channel::createTestModel_layout_nhwc_weight_as_input();
+  const std::vector<Request> requests = createRequests(depthwise_conv2d_per_channel::get_examples_layout_nhwc_weight_as_input());
+  validateModel(model);
+  validateRequests(model, requests);
+}
+
+
+TEST_F(NeuralnetworksHidlTest, depthwise_conv2d_per_channel_layout_nchw) {
+  generated_tests::Execute(device,
+                           depthwise_conv2d_per_channel::createTestModel_layout_nchw,
+                           depthwise_conv2d_per_channel::is_ignored_layout_nchw,
+                           depthwise_conv2d_per_channel::get_examples_layout_nchw());
+}
+
+TEST_F(ValidationTest, depthwise_conv2d_per_channel_layout_nchw) {
+  const Model model = depthwise_conv2d_per_channel::createTestModel_layout_nchw();
+  const std::vector<Request> requests = createRequests(depthwise_conv2d_per_channel::get_examples_layout_nchw());
+  validateModel(model);
+  validateRequests(model, requests);
+}
+
+
+TEST_F(NeuralnetworksHidlTest, depthwise_conv2d_per_channel_layout_nchw_weight_as_input) {
+  generated_tests::Execute(device,
+                           depthwise_conv2d_per_channel::createTestModel_layout_nchw_weight_as_input,
+                           depthwise_conv2d_per_channel::is_ignored_layout_nchw_weight_as_input,
+                           depthwise_conv2d_per_channel::get_examples_layout_nchw_weight_as_input());
+}
+
+TEST_F(ValidationTest, depthwise_conv2d_per_channel_layout_nchw_weight_as_input) {
+  const Model model = depthwise_conv2d_per_channel::createTestModel_layout_nchw_weight_as_input();
+  const std::vector<Request> requests = createRequests(depthwise_conv2d_per_channel::get_examples_layout_nchw_weight_as_input());
+  validateModel(model);
+  validateRequests(model, requests);
+}
+
+
 // Generated from: depthwise_conv2d_v1_2.mod.py.
 namespace depthwise_conv2d_v1_2 {
 // Generated depthwise_conv2d_v1_2 test
@@ -6096,6 +6224,194 @@
 }
 
 
+// Generated from: embedding_lookup_sparse.mod.py.
+namespace embedding_lookup_sparse {
+// Generated embedding_lookup_sparse test
+#include "examples/embedding_lookup_sparse.example.cpp"
+// Generated model constructor
+#include "vts_models/embedding_lookup_sparse.model.cpp"
+} // namespace embedding_lookup_sparse
+
+TEST_F(NeuralnetworksHidlTest, embedding_lookup_sparse_simple_sum) {
+  generated_tests::Execute(device,
+                           embedding_lookup_sparse::createTestModel,
+                           embedding_lookup_sparse::is_ignored,
+                           embedding_lookup_sparse::get_examples_simple_sum());
+}
+
+TEST_F(ValidationTest, embedding_lookup_sparse_simple_sum) {
+  const Model model = embedding_lookup_sparse::createTestModel();
+  const std::vector<Request> requests = createRequests(embedding_lookup_sparse::get_examples_simple_sum());
+  validateModel(model);
+  validateRequests(model, requests);
+}
+
+
+TEST_F(NeuralnetworksHidlTest, embedding_lookup_sparse_simple_sum_relaxed) {
+  generated_tests::Execute(device,
+                           embedding_lookup_sparse::createTestModel_relaxed,
+                           embedding_lookup_sparse::is_ignored_relaxed,
+                           embedding_lookup_sparse::get_examples_simple_sum_relaxed());
+}
+
+TEST_F(ValidationTest, embedding_lookup_sparse_simple_sum_relaxed) {
+  const Model model = embedding_lookup_sparse::createTestModel_relaxed();
+  const std::vector<Request> requests = createRequests(embedding_lookup_sparse::get_examples_simple_sum_relaxed());
+  validateModel(model);
+  validateRequests(model, requests);
+}
+
+
+TEST_F(NeuralnetworksHidlTest, embedding_lookup_sparse_simple_sum_float16) {
+  generated_tests::Execute(device,
+                           embedding_lookup_sparse::createTestModel_float16,
+                           embedding_lookup_sparse::is_ignored_float16,
+                           embedding_lookup_sparse::get_examples_simple_sum_float16());
+}
+
+TEST_F(ValidationTest, embedding_lookup_sparse_simple_sum_float16) {
+  const Model model = embedding_lookup_sparse::createTestModel_float16();
+  const std::vector<Request> requests = createRequests(embedding_lookup_sparse::get_examples_simple_sum_float16());
+  validateModel(model);
+  validateRequests(model, requests);
+}
+
+
+TEST_F(NeuralnetworksHidlTest, embedding_lookup_sparse_simple_mean) {
+  generated_tests::Execute(device,
+                           embedding_lookup_sparse::createTestModel_2,
+                           embedding_lookup_sparse::is_ignored_2,
+                           embedding_lookup_sparse::get_examples_simple_mean());
+}
+
+TEST_F(ValidationTest, embedding_lookup_sparse_simple_mean) {
+  const Model model = embedding_lookup_sparse::createTestModel_2();
+  const std::vector<Request> requests = createRequests(embedding_lookup_sparse::get_examples_simple_mean());
+  validateModel(model);
+  validateRequests(model, requests);
+}
+
+
+TEST_F(NeuralnetworksHidlTest, embedding_lookup_sparse_simple_mean_relaxed) {
+  generated_tests::Execute(device,
+                           embedding_lookup_sparse::createTestModel_relaxed_2,
+                           embedding_lookup_sparse::is_ignored_relaxed_2,
+                           embedding_lookup_sparse::get_examples_simple_mean_relaxed());
+}
+
+TEST_F(ValidationTest, embedding_lookup_sparse_simple_mean_relaxed) {
+  const Model model = embedding_lookup_sparse::createTestModel_relaxed_2();
+  const std::vector<Request> requests = createRequests(embedding_lookup_sparse::get_examples_simple_mean_relaxed());
+  validateModel(model);
+  validateRequests(model, requests);
+}
+
+
+TEST_F(NeuralnetworksHidlTest, embedding_lookup_sparse_simple_mean_float16) {
+  generated_tests::Execute(device,
+                           embedding_lookup_sparse::createTestModel_float16_2,
+                           embedding_lookup_sparse::is_ignored_float16_2,
+                           embedding_lookup_sparse::get_examples_simple_mean_float16());
+}
+
+TEST_F(ValidationTest, embedding_lookup_sparse_simple_mean_float16) {
+  const Model model = embedding_lookup_sparse::createTestModel_float16_2();
+  const std::vector<Request> requests = createRequests(embedding_lookup_sparse::get_examples_simple_mean_float16());
+  validateModel(model);
+  validateRequests(model, requests);
+}
+
+
+TEST_F(NeuralnetworksHidlTest, embedding_lookup_sparse_simple_sqrtn) {
+  generated_tests::Execute(device,
+                           embedding_lookup_sparse::createTestModel_3,
+                           embedding_lookup_sparse::is_ignored_3,
+                           embedding_lookup_sparse::get_examples_simple_sqrtn());
+}
+
+TEST_F(ValidationTest, embedding_lookup_sparse_simple_sqrtn) {
+  const Model model = embedding_lookup_sparse::createTestModel_3();
+  const std::vector<Request> requests = createRequests(embedding_lookup_sparse::get_examples_simple_sqrtn());
+  validateModel(model);
+  validateRequests(model, requests);
+}
+
+
+TEST_F(NeuralnetworksHidlTest, embedding_lookup_sparse_simple_sqrtn_relaxed) {
+  generated_tests::Execute(device,
+                           embedding_lookup_sparse::createTestModel_relaxed_3,
+                           embedding_lookup_sparse::is_ignored_relaxed_3,
+                           embedding_lookup_sparse::get_examples_simple_sqrtn_relaxed());
+}
+
+TEST_F(ValidationTest, embedding_lookup_sparse_simple_sqrtn_relaxed) {
+  const Model model = embedding_lookup_sparse::createTestModel_relaxed_3();
+  const std::vector<Request> requests = createRequests(embedding_lookup_sparse::get_examples_simple_sqrtn_relaxed());
+  validateModel(model);
+  validateRequests(model, requests);
+}
+
+
+TEST_F(NeuralnetworksHidlTest, embedding_lookup_sparse_simple_sqrtn_float16) {
+  generated_tests::Execute(device,
+                           embedding_lookup_sparse::createTestModel_float16_3,
+                           embedding_lookup_sparse::is_ignored_float16_3,
+                           embedding_lookup_sparse::get_examples_simple_sqrtn_float16());
+}
+
+TEST_F(ValidationTest, embedding_lookup_sparse_simple_sqrtn_float16) {
+  const Model model = embedding_lookup_sparse::createTestModel_float16_3();
+  const std::vector<Request> requests = createRequests(embedding_lookup_sparse::get_examples_simple_sqrtn_float16());
+  validateModel(model);
+  validateRequests(model, requests);
+}
+
+
+TEST_F(NeuralnetworksHidlTest, embedding_lookup_sparse_3d_indices) {
+  generated_tests::Execute(device,
+                           embedding_lookup_sparse::createTestModel_4,
+                           embedding_lookup_sparse::is_ignored_4,
+                           embedding_lookup_sparse::get_examples_3d_indices());
+}
+
+TEST_F(ValidationTest, embedding_lookup_sparse_3d_indices) {
+  const Model model = embedding_lookup_sparse::createTestModel_4();
+  const std::vector<Request> requests = createRequests(embedding_lookup_sparse::get_examples_3d_indices());
+  validateModel(model);
+  validateRequests(model, requests);
+}
+
+
+TEST_F(NeuralnetworksHidlTest, embedding_lookup_sparse_3d_indices_relaxed) {
+  generated_tests::Execute(device,
+                           embedding_lookup_sparse::createTestModel_relaxed_4,
+                           embedding_lookup_sparse::is_ignored_relaxed_4,
+                           embedding_lookup_sparse::get_examples_3d_indices_relaxed());
+}
+
+TEST_F(ValidationTest, embedding_lookup_sparse_3d_indices_relaxed) {
+  const Model model = embedding_lookup_sparse::createTestModel_relaxed_4();
+  const std::vector<Request> requests = createRequests(embedding_lookup_sparse::get_examples_3d_indices_relaxed());
+  validateModel(model);
+  validateRequests(model, requests);
+}
+
+
+TEST_F(NeuralnetworksHidlTest, embedding_lookup_sparse_3d_indices_float16) {
+  generated_tests::Execute(device,
+                           embedding_lookup_sparse::createTestModel_float16_4,
+                           embedding_lookup_sparse::is_ignored_float16_4,
+                           embedding_lookup_sparse::get_examples_3d_indices_float16());
+}
+
+TEST_F(ValidationTest, embedding_lookup_sparse_3d_indices_float16) {
+  const Model model = embedding_lookup_sparse::createTestModel_float16_4();
+  const std::vector<Request> requests = createRequests(embedding_lookup_sparse::get_examples_3d_indices_float16());
+  validateModel(model);
+  validateRequests(model, requests);
+}
+
+
 // Generated from: equal.mod.py.
 namespace equal {
 // Generated equal test
diff --git a/runtime/test/generated/examples/depthwise_conv2d_per_channel.example.cpp b/runtime/test/generated/examples/depthwise_conv2d_per_channel.example.cpp
new file mode 100644
index 0000000..a451d5d
--- /dev/null
+++ b/runtime/test/generated/examples/depthwise_conv2d_per_channel.example.cpp
@@ -0,0 +1,362 @@
+// clang-format off
+// Generated file (from: depthwise_conv2d_per_channel.mod.py). Do not edit
+std::vector<MixedTypedExample>& get_examples_same() {
+static std::vector<MixedTypedExample> examples_same = {
+// Begin of an example
+{
+.operands = {
+//Input(s)
+{ // See tools/test_generator/include/TestHarness.h:MixedTyped
+  // int -> FLOAT32 map
+  {},
+  // int -> INT32 map
+  {},
+  // int -> QUANT8_ASYMM map
+  {{0, {4, 16, 4, 32, 4, 64, 4, 128}}},
+  // int -> QUANT16_SYMM map
+  {},
+  // int -> FLOAT16 map
+  {},
+  // int -> BOOL8 map
+  {},
+  // int -> QUANT8_SYMM_PER_CHANNEL map
+  {},
+},
+//Output(s)
+{ // See tools/test_generator/include/TestHarness.h:MixedTyped
+  // int -> FLOAT32 map
+  {},
+  // int -> INT32 map
+  {},
+  // int -> QUANT8_ASYMM map
+  {{0, {8, 48}}},
+  // int -> QUANT16_SYMM map
+  {},
+  // int -> FLOAT16 map
+  {},
+  // int -> BOOL8 map
+  {},
+  // int -> QUANT8_SYMM_PER_CHANNEL map
+  {},
+}
+},
+}, // End of an example
+};
+return examples_same;
+};
+
+std::vector<MixedTypedExample>& get_examples_same_weight_as_input() {
+static std::vector<MixedTypedExample> examples_same_weight_as_input = {
+// Begin of an example
+{
+.operands = {
+//Input(s)
+{ // See tools/test_generator/include/TestHarness.h:MixedTyped
+  // int -> FLOAT32 map
+  {},
+  // int -> INT32 map
+  {{2, {0, 0}}},
+  // int -> QUANT8_ASYMM map
+  {{0, {4, 16, 4, 32, 4, 64, 4, 128}}},
+  // int -> QUANT16_SYMM map
+  {},
+  // int -> FLOAT16 map
+  {},
+  // int -> BOOL8 map
+  {},
+  // int -> QUANT8_SYMM_PER_CHANNEL map
+  {{1, {2, 4, 2, 0, 2, 2, 2, 0}}},
+},
+//Output(s)
+{ // See tools/test_generator/include/TestHarness.h:MixedTyped
+  // int -> FLOAT32 map
+  {},
+  // int -> INT32 map
+  {},
+  // int -> QUANT8_ASYMM map
+  {{0, {8, 48}}},
+  // int -> QUANT16_SYMM map
+  {},
+  // int -> FLOAT16 map
+  {},
+  // int -> BOOL8 map
+  {},
+  // int -> QUANT8_SYMM_PER_CHANNEL map
+  {},
+}
+},
+}, // End of an example
+};
+return examples_same_weight_as_input;
+};
+
+std::vector<MixedTypedExample>& get_examples_different() {
+static std::vector<MixedTypedExample> examples_different = {
+// Begin of an example
+{
+.operands = {
+//Input(s)
+{ // See tools/test_generator/include/TestHarness.h:MixedTyped
+  // int -> FLOAT32 map
+  {},
+  // int -> INT32 map
+  {},
+  // int -> QUANT8_ASYMM map
+  {{0, {129, 130, 129, 130, 129, 130, 129, 130, 129, 130, 129, 130, 129, 130, 129, 130, 129, 130}}},
+  // int -> QUANT16_SYMM map
+  {},
+  // int -> FLOAT16 map
+  {},
+  // int -> BOOL8 map
+  {},
+  // int -> QUANT8_SYMM_PER_CHANNEL map
+  {},
+},
+//Output(s)
+{ // See tools/test_generator/include/TestHarness.h:MixedTyped
+  // int -> FLOAT32 map
+  {},
+  // int -> INT32 map
+  {},
+  // int -> QUANT8_ASYMM map
+  {{0, {132, 130, 128, 128, 132, 130, 129, 128}}},
+  // int -> QUANT16_SYMM map
+  {},
+  // int -> FLOAT16 map
+  {},
+  // int -> BOOL8 map
+  {},
+  // int -> QUANT8_SYMM_PER_CHANNEL map
+  {},
+}
+},
+}, // End of an example
+};
+return examples_different;
+};
+
+std::vector<MixedTypedExample>& get_examples_different_weight_as_input() {
+static std::vector<MixedTypedExample> examples_different_weight_as_input = {
+// Begin of an example
+{
+.operands = {
+//Input(s)
+{ // See tools/test_generator/include/TestHarness.h:MixedTyped
+  // int -> FLOAT32 map
+  {},
+  // int -> INT32 map
+  {{2, {4, 4}}},
+  // int -> QUANT8_ASYMM map
+  {{0, {129, 130, 129, 130, 129, 130, 129, 130, 129, 130, 129, 130, 129, 130, 129, 130, 129, 130}}},
+  // int -> QUANT16_SYMM map
+  {},
+  // int -> FLOAT16 map
+  {},
+  // int -> BOOL8 map
+  {},
+  // int -> QUANT8_SYMM_PER_CHANNEL map
+  {{1, {1, 1, 1, 1, 1, 1, 1, 1}}},
+},
+//Output(s)
+{ // See tools/test_generator/include/TestHarness.h:MixedTyped
+  // int -> FLOAT32 map
+  {},
+  // int -> INT32 map
+  {},
+  // int -> QUANT8_ASYMM map
+  {{0, {132, 130, 128, 128, 132, 130, 129, 128}}},
+  // int -> QUANT16_SYMM map
+  {},
+  // int -> FLOAT16 map
+  {},
+  // int -> BOOL8 map
+  {},
+  // int -> QUANT8_SYMM_PER_CHANNEL map
+  {},
+}
+},
+}, // End of an example
+};
+return examples_different_weight_as_input;
+};
+
+std::vector<MixedTypedExample>& get_examples_layout_nhwc() {
+static std::vector<MixedTypedExample> examples_layout_nhwc = {
+// Begin of an example
+{
+.operands = {
+//Input(s)
+{ // See tools/test_generator/include/TestHarness.h:MixedTyped
+  // int -> FLOAT32 map
+  {},
+  // int -> INT32 map
+  {},
+  // int -> QUANT8_ASYMM map
+  {{0, {129, 130, 129, 130, 129, 130, 129, 130, 129, 130, 129, 130, 129, 130, 129, 130, 129, 130}}},
+  // int -> QUANT16_SYMM map
+  {},
+  // int -> FLOAT16 map
+  {},
+  // int -> BOOL8 map
+  {},
+  // int -> QUANT8_SYMM_PER_CHANNEL map
+  {},
+},
+//Output(s)
+{ // See tools/test_generator/include/TestHarness.h:MixedTyped
+  // int -> FLOAT32 map
+  {},
+  // int -> INT32 map
+  {},
+  // int -> QUANT8_ASYMM map
+  {{0, {132, 130, 128, 128, 132, 130, 129, 128}}},
+  // int -> QUANT16_SYMM map
+  {},
+  // int -> FLOAT16 map
+  {},
+  // int -> BOOL8 map
+  {},
+  // int -> QUANT8_SYMM_PER_CHANNEL map
+  {},
+}
+},
+}, // End of an example
+};
+return examples_layout_nhwc;
+};
+
+std::vector<MixedTypedExample>& get_examples_layout_nhwc_weight_as_input() {
+static std::vector<MixedTypedExample> examples_layout_nhwc_weight_as_input = {
+// Begin of an example
+{
+.operands = {
+//Input(s)
+{ // See tools/test_generator/include/TestHarness.h:MixedTyped
+  // int -> FLOAT32 map
+  {},
+  // int -> INT32 map
+  {{2, {4, 4}}},
+  // int -> QUANT8_ASYMM map
+  {{0, {129, 130, 129, 130, 129, 130, 129, 130, 129, 130, 129, 130, 129, 130, 129, 130, 129, 130}}},
+  // int -> QUANT16_SYMM map
+  {},
+  // int -> FLOAT16 map
+  {},
+  // int -> BOOL8 map
+  {},
+  // int -> QUANT8_SYMM_PER_CHANNEL map
+  {{1, {1, 1, 1, 1, 1, 1, 1, 1}}},
+},
+//Output(s)
+{ // See tools/test_generator/include/TestHarness.h:MixedTyped
+  // int -> FLOAT32 map
+  {},
+  // int -> INT32 map
+  {},
+  // int -> QUANT8_ASYMM map
+  {{0, {132, 130, 128, 128, 132, 130, 129, 128}}},
+  // int -> QUANT16_SYMM map
+  {},
+  // int -> FLOAT16 map
+  {},
+  // int -> BOOL8 map
+  {},
+  // int -> QUANT8_SYMM_PER_CHANNEL map
+  {},
+}
+},
+}, // End of an example
+};
+return examples_layout_nhwc_weight_as_input;
+};
+
+std::vector<MixedTypedExample>& get_examples_layout_nchw() {
+static std::vector<MixedTypedExample> examples_layout_nchw = {
+// Begin of an example
+{
+.operands = {
+//Input(s)
+{ // See tools/test_generator/include/TestHarness.h:MixedTyped
+  // int -> FLOAT32 map
+  {},
+  // int -> INT32 map
+  {},
+  // int -> QUANT8_ASYMM map
+  {{0, {129, 129, 129, 129, 129, 129, 129, 129, 129, 130, 130, 130, 130, 130, 130, 130, 130, 130}}},
+  // int -> QUANT16_SYMM map
+  {},
+  // int -> FLOAT16 map
+  {},
+  // int -> BOOL8 map
+  {},
+  // int -> QUANT8_SYMM_PER_CHANNEL map
+  {},
+},
+//Output(s)
+{ // See tools/test_generator/include/TestHarness.h:MixedTyped
+  // int -> FLOAT32 map
+  {},
+  // int -> INT32 map
+  {},
+  // int -> QUANT8_ASYMM map
+  {{0, {132, 128, 132, 129, 130, 128, 130, 128}}},
+  // int -> QUANT16_SYMM map
+  {},
+  // int -> FLOAT16 map
+  {},
+  // int -> BOOL8 map
+  {},
+  // int -> QUANT8_SYMM_PER_CHANNEL map
+  {},
+}
+},
+}, // End of an example
+};
+return examples_layout_nchw;
+};
+
+std::vector<MixedTypedExample>& get_examples_layout_nchw_weight_as_input() {
+static std::vector<MixedTypedExample> examples_layout_nchw_weight_as_input = {
+// Begin of an example
+{
+.operands = {
+//Input(s)
+{ // See tools/test_generator/include/TestHarness.h:MixedTyped
+  // int -> FLOAT32 map
+  {},
+  // int -> INT32 map
+  {{2, {4, 4}}},
+  // int -> QUANT8_ASYMM map
+  {{0, {129, 129, 129, 129, 129, 129, 129, 129, 129, 130, 130, 130, 130, 130, 130, 130, 130, 130}}},
+  // int -> QUANT16_SYMM map
+  {},
+  // int -> FLOAT16 map
+  {},
+  // int -> BOOL8 map
+  {},
+  // int -> QUANT8_SYMM_PER_CHANNEL map
+  {{1, {1, 1, 1, 1, 1, 1, 1, 1}}},
+},
+//Output(s)
+{ // See tools/test_generator/include/TestHarness.h:MixedTyped
+  // int -> FLOAT32 map
+  {},
+  // int -> INT32 map
+  {},
+  // int -> QUANT8_ASYMM map
+  {{0, {132, 128, 132, 129, 130, 128, 130, 128}}},
+  // int -> QUANT16_SYMM map
+  {},
+  // int -> FLOAT16 map
+  {},
+  // int -> BOOL8 map
+  {},
+  // int -> QUANT8_SYMM_PER_CHANNEL map
+  {},
+}
+},
+}, // End of an example
+};
+return examples_layout_nchw_weight_as_input;
+};
+
diff --git a/runtime/test/generated/models/depthwise_conv2d_per_channel.model.cpp b/runtime/test/generated/models/depthwise_conv2d_per_channel.model.cpp
new file mode 100644
index 0000000..092bd87
--- /dev/null
+++ b/runtime/test/generated/models/depthwise_conv2d_per_channel.model.cpp
@@ -0,0 +1,426 @@
+// clang-format off
+// Generated file (from: depthwise_conv2d_per_channel.mod.py). Do not edit
+void CreateModel_same(Model *model) {
+  OperandType type0(Type::TENSOR_QUANT8_ASYMM, {1, 2, 2, 2}, 0.5f, 0);
+  OperandType type1(Type::TENSOR_QUANT8_SYMM_PER_CHANNEL, {1, 2, 2, 2}, 0.0f, 0, SymmPerChannelQuantParams({0.5f, 0.5f},3));
+  OperandType type2(Type::TENSOR_INT32, {2});
+  OperandType type3(Type::TENSOR_QUANT8_ASYMM, {1, 1, 1, 2}, 1.0f, 0);
+  OperandType type4(Type::INT32, {});
+  // Phase 1, operands
+  auto op1 = model->addOperand(&type0);
+  auto op2 = model->addOperand(&type1);
+  auto op3 = model->addOperand(&type2);
+  auto param = model->addOperand(&type4);
+  auto param1 = model->addOperand(&type4);
+  auto param2 = model->addOperand(&type4);
+  auto param3 = model->addOperand(&type4);
+  auto param4 = model->addOperand(&type4);
+  auto param5 = model->addOperand(&type4);
+  auto param6 = model->addOperand(&type4);
+  auto param7 = model->addOperand(&type4);
+  auto op4 = model->addOperand(&type3);
+  // Phase 2, operations
+  static int8_t op2_init[] = {2, 4, 2, 0, 2, 2, 2, 0};
+  model->setOperandValue(op2, op2_init, sizeof(int8_t) * 8);
+  static int32_t op3_init[] = {0, 0};
+  model->setOperandValue(op3, op3_init, sizeof(int32_t) * 2);
+  static int32_t param_init[] = {0};
+  model->setOperandValue(param, param_init, sizeof(int32_t) * 1);
+  static int32_t param1_init[] = {0};
+  model->setOperandValue(param1, param1_init, sizeof(int32_t) * 1);
+  static int32_t param2_init[] = {0};
+  model->setOperandValue(param2, param2_init, sizeof(int32_t) * 1);
+  static int32_t param3_init[] = {0};
+  model->setOperandValue(param3, param3_init, sizeof(int32_t) * 1);
+  static int32_t param4_init[] = {1};
+  model->setOperandValue(param4, param4_init, sizeof(int32_t) * 1);
+  static int32_t param5_init[] = {1};
+  model->setOperandValue(param5, param5_init, sizeof(int32_t) * 1);
+  static int32_t param6_init[] = {1};
+  model->setOperandValue(param6, param6_init, sizeof(int32_t) * 1);
+  static int32_t param7_init[] = {0};
+  model->setOperandValue(param7, param7_init, sizeof(int32_t) * 1);
+  model->addOperation(ANEURALNETWORKS_DEPTHWISE_CONV_2D, {op1, op2, op3, param, param1, param2, param3, param4, param5, param6, param7}, {op4});
+  // Phase 3, inputs and outputs
+  model->identifyInputsAndOutputs(
+    {op1},
+    {op4});
+  assert(model->isValid());
+}
+
+inline bool is_ignored_same(int i) {
+  static std::set<int> ignore = {};
+  return ignore.find(i) != ignore.end();
+}
+
+void CreateModel_same_weight_as_input(Model *model) {
+  OperandType type0(Type::TENSOR_QUANT8_ASYMM, {1, 2, 2, 2}, 0.5f, 0);
+  OperandType type10(Type::TENSOR_QUANT8_SYMM_PER_CHANNEL, {1, 2, 2, 2}, 0.0f, 0, SymmPerChannelQuantParams({0.5f, 0.5f},3));
+  OperandType type2(Type::TENSOR_INT32, {2});
+  OperandType type3(Type::TENSOR_QUANT8_ASYMM, {1, 1, 1, 2}, 1.0f, 0);
+  OperandType type4(Type::INT32, {});
+  // Phase 1, operands
+  auto op1 = model->addOperand(&type0);
+  auto op2 = model->addOperand(&type10);
+  auto op3 = model->addOperand(&type2);
+  auto param = model->addOperand(&type4);
+  auto param1 = model->addOperand(&type4);
+  auto param2 = model->addOperand(&type4);
+  auto param3 = model->addOperand(&type4);
+  auto param4 = model->addOperand(&type4);
+  auto param5 = model->addOperand(&type4);
+  auto param6 = model->addOperand(&type4);
+  auto param7 = model->addOperand(&type4);
+  auto op4 = model->addOperand(&type3);
+  // Phase 2, operations
+  static int32_t param_init[] = {0};
+  model->setOperandValue(param, param_init, sizeof(int32_t) * 1);
+  static int32_t param1_init[] = {0};
+  model->setOperandValue(param1, param1_init, sizeof(int32_t) * 1);
+  static int32_t param2_init[] = {0};
+  model->setOperandValue(param2, param2_init, sizeof(int32_t) * 1);
+  static int32_t param3_init[] = {0};
+  model->setOperandValue(param3, param3_init, sizeof(int32_t) * 1);
+  static int32_t param4_init[] = {1};
+  model->setOperandValue(param4, param4_init, sizeof(int32_t) * 1);
+  static int32_t param5_init[] = {1};
+  model->setOperandValue(param5, param5_init, sizeof(int32_t) * 1);
+  static int32_t param6_init[] = {1};
+  model->setOperandValue(param6, param6_init, sizeof(int32_t) * 1);
+  static int32_t param7_init[] = {0};
+  model->setOperandValue(param7, param7_init, sizeof(int32_t) * 1);
+  model->addOperation(ANEURALNETWORKS_DEPTHWISE_CONV_2D, {op1, op2, op3, param, param1, param2, param3, param4, param5, param6, param7}, {op4});
+  // Phase 3, inputs and outputs
+  model->identifyInputsAndOutputs(
+    {op1, op2, op3},
+    {op4});
+  assert(model->isValid());
+}
+
+inline bool is_ignored_same_weight_as_input(int i) {
+  static std::set<int> ignore = {};
+  return ignore.find(i) != ignore.end();
+}
+
+void CreateModel_different(Model *model) {
+  OperandType type2(Type::TENSOR_INT32, {2});
+  OperandType type4(Type::INT32, {});
+  OperandType type5(Type::TENSOR_QUANT8_ASYMM, {1, 3, 3, 2}, 0.5f, 128);
+  OperandType type6(Type::TENSOR_QUANT8_SYMM_PER_CHANNEL, {1, 2, 2, 2}, 0.0f, 0, SymmPerChannelQuantParams({1.0f, 0.5f},3));
+  OperandType type7(Type::TENSOR_QUANT8_ASYMM, {1, 2, 2, 2}, 1.0f, 128);
+  // Phase 1, operands
+  auto op11 = model->addOperand(&type5);
+  auto op21 = model->addOperand(&type6);
+  auto op31 = model->addOperand(&type2);
+  auto param8 = model->addOperand(&type4);
+  auto param9 = model->addOperand(&type4);
+  auto param10 = model->addOperand(&type4);
+  auto param11 = model->addOperand(&type4);
+  auto param12 = model->addOperand(&type4);
+  auto param13 = model->addOperand(&type4);
+  auto param14 = model->addOperand(&type4);
+  auto param15 = model->addOperand(&type4);
+  auto op41 = model->addOperand(&type7);
+  // Phase 2, operations
+  static int8_t op21_init[] = {1, 1, 1, 1, 1, 1, 1, 1};
+  model->setOperandValue(op21, op21_init, sizeof(int8_t) * 8);
+  static int32_t op31_init[] = {4, 4};
+  model->setOperandValue(op31, op31_init, sizeof(int32_t) * 2);
+  static int32_t param8_init[] = {0};
+  model->setOperandValue(param8, param8_init, sizeof(int32_t) * 1);
+  static int32_t param9_init[] = {0};
+  model->setOperandValue(param9, param9_init, sizeof(int32_t) * 1);
+  static int32_t param10_init[] = {0};
+  model->setOperandValue(param10, param10_init, sizeof(int32_t) * 1);
+  static int32_t param11_init[] = {0};
+  model->setOperandValue(param11, param11_init, sizeof(int32_t) * 1);
+  static int32_t param12_init[] = {1};
+  model->setOperandValue(param12, param12_init, sizeof(int32_t) * 1);
+  static int32_t param13_init[] = {1};
+  model->setOperandValue(param13, param13_init, sizeof(int32_t) * 1);
+  static int32_t param14_init[] = {2};
+  model->setOperandValue(param14, param14_init, sizeof(int32_t) * 1);
+  static int32_t param15_init[] = {0};
+  model->setOperandValue(param15, param15_init, sizeof(int32_t) * 1);
+  model->addOperation(ANEURALNETWORKS_DEPTHWISE_CONV_2D, {op11, op21, op31, param8, param9, param10, param11, param12, param13, param14, param15}, {op41});
+  // Phase 3, inputs and outputs
+  model->identifyInputsAndOutputs(
+    {op11},
+    {op41});
+  assert(model->isValid());
+}
+
+inline bool is_ignored_different(int i) {
+  static std::set<int> ignore = {};
+  return ignore.find(i) != ignore.end();
+}
+
+void CreateModel_different_weight_as_input(Model *model) {
+  OperandType type11(Type::TENSOR_QUANT8_SYMM_PER_CHANNEL, {1, 2, 2, 2}, 0.0f, 0, SymmPerChannelQuantParams({1.0f, 0.5f},3));
+  OperandType type2(Type::TENSOR_INT32, {2});
+  OperandType type4(Type::INT32, {});
+  OperandType type5(Type::TENSOR_QUANT8_ASYMM, {1, 3, 3, 2}, 0.5f, 128);
+  OperandType type7(Type::TENSOR_QUANT8_ASYMM, {1, 2, 2, 2}, 1.0f, 128);
+  // Phase 1, operands
+  auto op11 = model->addOperand(&type5);
+  auto op21 = model->addOperand(&type11);
+  auto op31 = model->addOperand(&type2);
+  auto param8 = model->addOperand(&type4);
+  auto param9 = model->addOperand(&type4);
+  auto param10 = model->addOperand(&type4);
+  auto param11 = model->addOperand(&type4);
+  auto param12 = model->addOperand(&type4);
+  auto param13 = model->addOperand(&type4);
+  auto param14 = model->addOperand(&type4);
+  auto param15 = model->addOperand(&type4);
+  auto op41 = model->addOperand(&type7);
+  // Phase 2, operations
+  static int32_t param8_init[] = {0};
+  model->setOperandValue(param8, param8_init, sizeof(int32_t) * 1);
+  static int32_t param9_init[] = {0};
+  model->setOperandValue(param9, param9_init, sizeof(int32_t) * 1);
+  static int32_t param10_init[] = {0};
+  model->setOperandValue(param10, param10_init, sizeof(int32_t) * 1);
+  static int32_t param11_init[] = {0};
+  model->setOperandValue(param11, param11_init, sizeof(int32_t) * 1);
+  static int32_t param12_init[] = {1};
+  model->setOperandValue(param12, param12_init, sizeof(int32_t) * 1);
+  static int32_t param13_init[] = {1};
+  model->setOperandValue(param13, param13_init, sizeof(int32_t) * 1);
+  static int32_t param14_init[] = {2};
+  model->setOperandValue(param14, param14_init, sizeof(int32_t) * 1);
+  static int32_t param15_init[] = {0};
+  model->setOperandValue(param15, param15_init, sizeof(int32_t) * 1);
+  model->addOperation(ANEURALNETWORKS_DEPTHWISE_CONV_2D, {op11, op21, op31, param8, param9, param10, param11, param12, param13, param14, param15}, {op41});
+  // Phase 3, inputs and outputs
+  model->identifyInputsAndOutputs(
+    {op11, op21, op31},
+    {op41});
+  assert(model->isValid());
+}
+
+inline bool is_ignored_different_weight_as_input(int i) {
+  static std::set<int> ignore = {};
+  return ignore.find(i) != ignore.end();
+}
+
+void CreateModel_layout_nhwc(Model *model) {
+  OperandType type2(Type::TENSOR_INT32, {2});
+  OperandType type4(Type::INT32, {});
+  OperandType type5(Type::TENSOR_QUANT8_ASYMM, {1, 3, 3, 2}, 0.5f, 128);
+  OperandType type7(Type::TENSOR_QUANT8_ASYMM, {1, 2, 2, 2}, 1.0f, 128);
+  OperandType type8(Type::BOOL, {});
+  OperandType type9(Type::TENSOR_QUANT8_SYMM_PER_CHANNEL, {1, 2, 2, 2}, 0.0f, 0, SymmPerChannelQuantParams({1.0f, 0.5f},3));
+  // Phase 1, operands
+  auto op12 = model->addOperand(&type5);
+  auto op22 = model->addOperand(&type9);
+  auto op32 = model->addOperand(&type2);
+  auto param16 = model->addOperand(&type4);
+  auto param17 = model->addOperand(&type4);
+  auto param18 = model->addOperand(&type4);
+  auto param19 = model->addOperand(&type4);
+  auto param20 = model->addOperand(&type4);
+  auto param21 = model->addOperand(&type4);
+  auto param22 = model->addOperand(&type4);
+  auto param23 = model->addOperand(&type4);
+  auto layout = model->addOperand(&type8);
+  auto op42 = model->addOperand(&type7);
+  // Phase 2, operations
+  static int8_t op22_init[] = {1, 1, 1, 1, 1, 1, 1, 1};
+  model->setOperandValue(op22, op22_init, sizeof(int8_t) * 8);
+  static int32_t op32_init[] = {4, 4};
+  model->setOperandValue(op32, op32_init, sizeof(int32_t) * 2);
+  static int32_t param16_init[] = {0};
+  model->setOperandValue(param16, param16_init, sizeof(int32_t) * 1);
+  static int32_t param17_init[] = {0};
+  model->setOperandValue(param17, param17_init, sizeof(int32_t) * 1);
+  static int32_t param18_init[] = {0};
+  model->setOperandValue(param18, param18_init, sizeof(int32_t) * 1);
+  static int32_t param19_init[] = {0};
+  model->setOperandValue(param19, param19_init, sizeof(int32_t) * 1);
+  static int32_t param20_init[] = {1};
+  model->setOperandValue(param20, param20_init, sizeof(int32_t) * 1);
+  static int32_t param21_init[] = {1};
+  model->setOperandValue(param21, param21_init, sizeof(int32_t) * 1);
+  static int32_t param22_init[] = {2};
+  model->setOperandValue(param22, param22_init, sizeof(int32_t) * 1);
+  static int32_t param23_init[] = {0};
+  model->setOperandValue(param23, param23_init, sizeof(int32_t) * 1);
+  static bool8 layout_init[] = {false};
+  model->setOperandValue(layout, layout_init, sizeof(bool8) * 1);
+  model->addOperation(ANEURALNETWORKS_DEPTHWISE_CONV_2D, {op12, op22, op32, param16, param17, param18, param19, param20, param21, param22, param23, layout}, {op42});
+  // Phase 3, inputs and outputs
+  model->identifyInputsAndOutputs(
+    {op12},
+    {op42});
+  assert(model->isValid());
+}
+
+inline bool is_ignored_layout_nhwc(int i) {
+  static std::set<int> ignore = {};
+  return ignore.find(i) != ignore.end();
+}
+
+void CreateModel_layout_nhwc_weight_as_input(Model *model) {
+  OperandType type12(Type::TENSOR_QUANT8_SYMM_PER_CHANNEL, {1, 2, 2, 2}, 0.0f, 0, SymmPerChannelQuantParams({1.0f, 0.5f},3));
+  OperandType type2(Type::TENSOR_INT32, {2});
+  OperandType type4(Type::INT32, {});
+  OperandType type5(Type::TENSOR_QUANT8_ASYMM, {1, 3, 3, 2}, 0.5f, 128);
+  OperandType type7(Type::TENSOR_QUANT8_ASYMM, {1, 2, 2, 2}, 1.0f, 128);
+  OperandType type8(Type::BOOL, {});
+  // Phase 1, operands
+  auto op12 = model->addOperand(&type5);
+  auto op22 = model->addOperand(&type12);
+  auto op32 = model->addOperand(&type2);
+  auto param16 = model->addOperand(&type4);
+  auto param17 = model->addOperand(&type4);
+  auto param18 = model->addOperand(&type4);
+  auto param19 = model->addOperand(&type4);
+  auto param20 = model->addOperand(&type4);
+  auto param21 = model->addOperand(&type4);
+  auto param22 = model->addOperand(&type4);
+  auto param23 = model->addOperand(&type4);
+  auto layout = model->addOperand(&type8);
+  auto op42 = model->addOperand(&type7);
+  // Phase 2, operations
+  static int32_t param16_init[] = {0};
+  model->setOperandValue(param16, param16_init, sizeof(int32_t) * 1);
+  static int32_t param17_init[] = {0};
+  model->setOperandValue(param17, param17_init, sizeof(int32_t) * 1);
+  static int32_t param18_init[] = {0};
+  model->setOperandValue(param18, param18_init, sizeof(int32_t) * 1);
+  static int32_t param19_init[] = {0};
+  model->setOperandValue(param19, param19_init, sizeof(int32_t) * 1);
+  static int32_t param20_init[] = {1};
+  model->setOperandValue(param20, param20_init, sizeof(int32_t) * 1);
+  static int32_t param21_init[] = {1};
+  model->setOperandValue(param21, param21_init, sizeof(int32_t) * 1);
+  static int32_t param22_init[] = {2};
+  model->setOperandValue(param22, param22_init, sizeof(int32_t) * 1);
+  static int32_t param23_init[] = {0};
+  model->setOperandValue(param23, param23_init, sizeof(int32_t) * 1);
+  static bool8 layout_init[] = {false};
+  model->setOperandValue(layout, layout_init, sizeof(bool8) * 1);
+  model->addOperation(ANEURALNETWORKS_DEPTHWISE_CONV_2D, {op12, op22, op32, param16, param17, param18, param19, param20, param21, param22, param23, layout}, {op42});
+  // Phase 3, inputs and outputs
+  model->identifyInputsAndOutputs(
+    {op12, op22, op32},
+    {op42});
+  assert(model->isValid());
+}
+
+inline bool is_ignored_layout_nhwc_weight_as_input(int i) {
+  static std::set<int> ignore = {};
+  return ignore.find(i) != ignore.end();
+}
+
+void CreateModel_layout_nchw(Model *model) {
+  OperandType type13(Type::TENSOR_QUANT8_ASYMM, {1, 2, 3, 3}, 0.5f, 128);
+  OperandType type2(Type::TENSOR_INT32, {2});
+  OperandType type4(Type::INT32, {});
+  OperandType type7(Type::TENSOR_QUANT8_ASYMM, {1, 2, 2, 2}, 1.0f, 128);
+  OperandType type8(Type::BOOL, {});
+  OperandType type9(Type::TENSOR_QUANT8_SYMM_PER_CHANNEL, {1, 2, 2, 2}, 0.0f, 0, SymmPerChannelQuantParams({1.0f, 0.5f},3));
+  // Phase 1, operands
+  auto op12 = model->addOperand(&type13);
+  auto op22 = model->addOperand(&type9);
+  auto op32 = model->addOperand(&type2);
+  auto param16 = model->addOperand(&type4);
+  auto param17 = model->addOperand(&type4);
+  auto param18 = model->addOperand(&type4);
+  auto param19 = model->addOperand(&type4);
+  auto param20 = model->addOperand(&type4);
+  auto param21 = model->addOperand(&type4);
+  auto param22 = model->addOperand(&type4);
+  auto param23 = model->addOperand(&type4);
+  auto layout = model->addOperand(&type8);
+  auto op42 = model->addOperand(&type7);
+  // Phase 2, operations
+  static int8_t op22_init[] = {1, 1, 1, 1, 1, 1, 1, 1};
+  model->setOperandValue(op22, op22_init, sizeof(int8_t) * 8);
+  static int32_t op32_init[] = {4, 4};
+  model->setOperandValue(op32, op32_init, sizeof(int32_t) * 2);
+  static int32_t param16_init[] = {0};
+  model->setOperandValue(param16, param16_init, sizeof(int32_t) * 1);
+  static int32_t param17_init[] = {0};
+  model->setOperandValue(param17, param17_init, sizeof(int32_t) * 1);
+  static int32_t param18_init[] = {0};
+  model->setOperandValue(param18, param18_init, sizeof(int32_t) * 1);
+  static int32_t param19_init[] = {0};
+  model->setOperandValue(param19, param19_init, sizeof(int32_t) * 1);
+  static int32_t param20_init[] = {1};
+  model->setOperandValue(param20, param20_init, sizeof(int32_t) * 1);
+  static int32_t param21_init[] = {1};
+  model->setOperandValue(param21, param21_init, sizeof(int32_t) * 1);
+  static int32_t param22_init[] = {2};
+  model->setOperandValue(param22, param22_init, sizeof(int32_t) * 1);
+  static int32_t param23_init[] = {0};
+  model->setOperandValue(param23, param23_init, sizeof(int32_t) * 1);
+  static bool8 layout_init[] = {true};
+  model->setOperandValue(layout, layout_init, sizeof(bool8) * 1);
+  model->addOperation(ANEURALNETWORKS_DEPTHWISE_CONV_2D, {op12, op22, op32, param16, param17, param18, param19, param20, param21, param22, param23, layout}, {op42});
+  // Phase 3, inputs and outputs
+  model->identifyInputsAndOutputs(
+    {op12},
+    {op42});
+  assert(model->isValid());
+}
+
+inline bool is_ignored_layout_nchw(int i) {
+  static std::set<int> ignore = {};
+  return ignore.find(i) != ignore.end();
+}
+
+void CreateModel_layout_nchw_weight_as_input(Model *model) {
+  OperandType type13(Type::TENSOR_QUANT8_ASYMM, {1, 2, 3, 3}, 0.5f, 128);
+  OperandType type14(Type::TENSOR_QUANT8_SYMM_PER_CHANNEL, {1, 2, 2, 2}, 0.0f, 0, SymmPerChannelQuantParams({1.0f, 0.5f},3));
+  OperandType type2(Type::TENSOR_INT32, {2});
+  OperandType type4(Type::INT32, {});
+  OperandType type7(Type::TENSOR_QUANT8_ASYMM, {1, 2, 2, 2}, 1.0f, 128);
+  OperandType type8(Type::BOOL, {});
+  // Phase 1, operands
+  auto op12 = model->addOperand(&type13);
+  auto op22 = model->addOperand(&type14);
+  auto op32 = model->addOperand(&type2);
+  auto param16 = model->addOperand(&type4);
+  auto param17 = model->addOperand(&type4);
+  auto param18 = model->addOperand(&type4);
+  auto param19 = model->addOperand(&type4);
+  auto param20 = model->addOperand(&type4);
+  auto param21 = model->addOperand(&type4);
+  auto param22 = model->addOperand(&type4);
+  auto param23 = model->addOperand(&type4);
+  auto layout = model->addOperand(&type8);
+  auto op42 = model->addOperand(&type7);
+  // Phase 2, operations
+  static int32_t param16_init[] = {0};
+  model->setOperandValue(param16, param16_init, sizeof(int32_t) * 1);
+  static int32_t param17_init[] = {0};
+  model->setOperandValue(param17, param17_init, sizeof(int32_t) * 1);
+  static int32_t param18_init[] = {0};
+  model->setOperandValue(param18, param18_init, sizeof(int32_t) * 1);
+  static int32_t param19_init[] = {0};
+  model->setOperandValue(param19, param19_init, sizeof(int32_t) * 1);
+  static int32_t param20_init[] = {1};
+  model->setOperandValue(param20, param20_init, sizeof(int32_t) * 1);
+  static int32_t param21_init[] = {1};
+  model->setOperandValue(param21, param21_init, sizeof(int32_t) * 1);
+  static int32_t param22_init[] = {2};
+  model->setOperandValue(param22, param22_init, sizeof(int32_t) * 1);
+  static int32_t param23_init[] = {0};
+  model->setOperandValue(param23, param23_init, sizeof(int32_t) * 1);
+  static bool8 layout_init[] = {true};
+  model->setOperandValue(layout, layout_init, sizeof(bool8) * 1);
+  model->addOperation(ANEURALNETWORKS_DEPTHWISE_CONV_2D, {op12, op22, op32, param16, param17, param18, param19, param20, param21, param22, param23, layout}, {op42});
+  // Phase 3, inputs and outputs
+  model->identifyInputsAndOutputs(
+    {op12, op22, op32},
+    {op42});
+  assert(model->isValid());
+}
+
+inline bool is_ignored_layout_nchw_weight_as_input(int i) {
+  static std::set<int> ignore = {};
+  return ignore.find(i) != ignore.end();
+}
+
diff --git a/runtime/test/generated/tests/depthwise_conv2d_per_channel.mod.py.cpp b/runtime/test/generated/tests/depthwise_conv2d_per_channel.mod.py.cpp
new file mode 100644
index 0000000..6bc8f60
--- /dev/null
+++ b/runtime/test/generated/tests/depthwise_conv2d_per_channel.mod.py.cpp
@@ -0,0 +1,59 @@
+// clang-format off
+// Generated file (from: depthwise_conv2d_per_channel.mod.py). Do not edit
+#include "../../TestGenerated.h"
+
+namespace depthwise_conv2d_per_channel {
+// Generated depthwise_conv2d_per_channel test
+#include "generated/examples/depthwise_conv2d_per_channel.example.cpp"
+// Generated model constructor
+#include "generated/models/depthwise_conv2d_per_channel.model.cpp"
+} // namespace depthwise_conv2d_per_channel
+
+TEST_F(GeneratedTests, depthwise_conv2d_per_channel_same) {
+    execute(depthwise_conv2d_per_channel::CreateModel_same,
+            depthwise_conv2d_per_channel::is_ignored_same,
+            depthwise_conv2d_per_channel::get_examples_same());
+}
+
+TEST_F(GeneratedTests, depthwise_conv2d_per_channel_same_weight_as_input) {
+    execute(depthwise_conv2d_per_channel::CreateModel_same_weight_as_input,
+            depthwise_conv2d_per_channel::is_ignored_same_weight_as_input,
+            depthwise_conv2d_per_channel::get_examples_same_weight_as_input());
+}
+
+TEST_F(GeneratedTests, depthwise_conv2d_per_channel_different) {
+    execute(depthwise_conv2d_per_channel::CreateModel_different,
+            depthwise_conv2d_per_channel::is_ignored_different,
+            depthwise_conv2d_per_channel::get_examples_different());
+}
+
+TEST_F(GeneratedTests, depthwise_conv2d_per_channel_different_weight_as_input) {
+    execute(depthwise_conv2d_per_channel::CreateModel_different_weight_as_input,
+            depthwise_conv2d_per_channel::is_ignored_different_weight_as_input,
+            depthwise_conv2d_per_channel::get_examples_different_weight_as_input());
+}
+
+TEST_F(GeneratedTests, depthwise_conv2d_per_channel_layout_nhwc) {
+    execute(depthwise_conv2d_per_channel::CreateModel_layout_nhwc,
+            depthwise_conv2d_per_channel::is_ignored_layout_nhwc,
+            depthwise_conv2d_per_channel::get_examples_layout_nhwc());
+}
+
+TEST_F(GeneratedTests, depthwise_conv2d_per_channel_layout_nhwc_weight_as_input) {
+    execute(depthwise_conv2d_per_channel::CreateModel_layout_nhwc_weight_as_input,
+            depthwise_conv2d_per_channel::is_ignored_layout_nhwc_weight_as_input,
+            depthwise_conv2d_per_channel::get_examples_layout_nhwc_weight_as_input());
+}
+
+TEST_F(GeneratedTests, depthwise_conv2d_per_channel_layout_nchw) {
+    execute(depthwise_conv2d_per_channel::CreateModel_layout_nchw,
+            depthwise_conv2d_per_channel::is_ignored_layout_nchw,
+            depthwise_conv2d_per_channel::get_examples_layout_nchw());
+}
+
+TEST_F(GeneratedTests, depthwise_conv2d_per_channel_layout_nchw_weight_as_input) {
+    execute(depthwise_conv2d_per_channel::CreateModel_layout_nchw_weight_as_input,
+            depthwise_conv2d_per_channel::is_ignored_layout_nchw_weight_as_input,
+            depthwise_conv2d_per_channel::get_examples_layout_nchw_weight_as_input());
+}
+
diff --git a/runtime/test/generated/vts_models/depthwise_conv2d_per_channel.model.cpp b/runtime/test/generated/vts_models/depthwise_conv2d_per_channel.model.cpp
new file mode 100644
index 0000000..877a360
--- /dev/null
+++ b/runtime/test/generated/vts_models/depthwise_conv2d_per_channel.model.cpp
@@ -0,0 +1,1206 @@
+// clang-format off
+// Generated file (from: depthwise_conv2d_per_channel.mod.py). Do not edit
+// Create the model
+Model createTestModel_same() {
+    Operand::ExtraParams extraParams1;
+    extraParams1.channelQuant(SymmPerChannelQuantParams{.scales={0.5f, 0.5f}, .channelDim=3});
+    const std::vector<Operand> operands = {
+        {
+            .type = OperandType::TENSOR_QUANT8_ASYMM,
+            .dimensions = {1, 2, 2, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.5f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,
+            .dimensions = {1, 2, 2, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 0, .length = 8},
+            .extraParams = std::move(extraParams1),
+        },
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 8, .length = 8},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 16, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 20, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 24, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 28, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 32, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 36, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 40, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 44, .length = 4},
+        },
+        {
+            .type = OperandType::TENSOR_QUANT8_ASYMM,
+            .dimensions = {1, 1, 1, 2},
+            .numberOfConsumers = 0,
+            .scale = 1.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_OUTPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        }
+    };
+
+    const std::vector<Operation> operations = {
+        {
+            .type = OperationType::DEPTHWISE_CONV_2D,
+            .inputs = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
+            .outputs = {11},
+        }
+    };
+
+    const std::vector<uint32_t> inputIndexes = {0};
+    const std::vector<uint32_t> outputIndexes = {11};
+    std::vector<uint8_t> operandValues = {
+      2, 4, 2, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
+    };
+    const std::vector<hidl_memory> pools = {};
+
+    return {
+        .operands = operands,
+        .operations = operations,
+        .inputIndexes = inputIndexes,
+        .outputIndexes = outputIndexes,
+        .operandValues = operandValues,
+        .pools = pools,
+    };
+}
+
+inline bool is_ignored_same(int i) {
+  static std::set<int> ignore = {};
+  return ignore.find(i) != ignore.end();
+}
+
+// Create the model
+Model createTestModel_same_weight_as_input() {
+    Operand::ExtraParams extraParams1;
+    extraParams1.channelQuant(SymmPerChannelQuantParams{.scales={0.5f, 0.5f}, .channelDim=3});
+    const std::vector<Operand> operands = {
+        {
+            .type = OperandType::TENSOR_QUANT8_ASYMM,
+            .dimensions = {1, 2, 2, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.5f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,
+            .dimensions = {1, 2, 2, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+            .extraParams = std::move(extraParams1),
+        },
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 0, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 4, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 8, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 12, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 16, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 20, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 24, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 28, .length = 4},
+        },
+        {
+            .type = OperandType::TENSOR_QUANT8_ASYMM,
+            .dimensions = {1, 1, 1, 2},
+            .numberOfConsumers = 0,
+            .scale = 1.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_OUTPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        }
+    };
+
+    const std::vector<Operation> operations = {
+        {
+            .type = OperationType::DEPTHWISE_CONV_2D,
+            .inputs = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
+            .outputs = {11},
+        }
+    };
+
+    const std::vector<uint32_t> inputIndexes = {0, 1, 2};
+    const std::vector<uint32_t> outputIndexes = {11};
+    std::vector<uint8_t> operandValues = {
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
+    };
+    const std::vector<hidl_memory> pools = {};
+
+    return {
+        .operands = operands,
+        .operations = operations,
+        .inputIndexes = inputIndexes,
+        .outputIndexes = outputIndexes,
+        .operandValues = operandValues,
+        .pools = pools,
+    };
+}
+
+inline bool is_ignored_same_weight_as_input(int i) {
+  static std::set<int> ignore = {};
+  return ignore.find(i) != ignore.end();
+}
+
+// Create the model
+Model createTestModel_different() {
+    Operand::ExtraParams extraParams1;
+    extraParams1.channelQuant(SymmPerChannelQuantParams{.scales={1.0f, 0.5f}, .channelDim=3});
+    const std::vector<Operand> operands = {
+        {
+            .type = OperandType::TENSOR_QUANT8_ASYMM,
+            .dimensions = {1, 3, 3, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.5f,
+            .zeroPoint = 128,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,
+            .dimensions = {1, 2, 2, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 0, .length = 8},
+            .extraParams = std::move(extraParams1),
+        },
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 8, .length = 8},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 16, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 20, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 24, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 28, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 32, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 36, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 40, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 44, .length = 4},
+        },
+        {
+            .type = OperandType::TENSOR_QUANT8_ASYMM,
+            .dimensions = {1, 2, 2, 2},
+            .numberOfConsumers = 0,
+            .scale = 1.0f,
+            .zeroPoint = 128,
+            .lifetime = OperandLifeTime::MODEL_OUTPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        }
+    };
+
+    const std::vector<Operation> operations = {
+        {
+            .type = OperationType::DEPTHWISE_CONV_2D,
+            .inputs = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
+            .outputs = {11},
+        }
+    };
+
+    const std::vector<uint32_t> inputIndexes = {0};
+    const std::vector<uint32_t> outputIndexes = {11};
+    std::vector<uint8_t> operandValues = {
+      1, 1, 1, 1, 1, 1, 1, 1, 4, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0
+    };
+    const std::vector<hidl_memory> pools = {};
+
+    return {
+        .operands = operands,
+        .operations = operations,
+        .inputIndexes = inputIndexes,
+        .outputIndexes = outputIndexes,
+        .operandValues = operandValues,
+        .pools = pools,
+    };
+}
+
+inline bool is_ignored_different(int i) {
+  static std::set<int> ignore = {};
+  return ignore.find(i) != ignore.end();
+}
+
+// Create the model
+Model createTestModel_different_weight_as_input() {
+    Operand::ExtraParams extraParams1;
+    extraParams1.channelQuant(SymmPerChannelQuantParams{.scales={1.0f, 0.5f}, .channelDim=3});
+    const std::vector<Operand> operands = {
+        {
+            .type = OperandType::TENSOR_QUANT8_ASYMM,
+            .dimensions = {1, 3, 3, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.5f,
+            .zeroPoint = 128,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,
+            .dimensions = {1, 2, 2, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+            .extraParams = std::move(extraParams1),
+        },
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 0, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 4, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 8, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 12, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 16, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 20, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 24, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 28, .length = 4},
+        },
+        {
+            .type = OperandType::TENSOR_QUANT8_ASYMM,
+            .dimensions = {1, 2, 2, 2},
+            .numberOfConsumers = 0,
+            .scale = 1.0f,
+            .zeroPoint = 128,
+            .lifetime = OperandLifeTime::MODEL_OUTPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        }
+    };
+
+    const std::vector<Operation> operations = {
+        {
+            .type = OperationType::DEPTHWISE_CONV_2D,
+            .inputs = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
+            .outputs = {11},
+        }
+    };
+
+    const std::vector<uint32_t> inputIndexes = {0, 1, 2};
+    const std::vector<uint32_t> outputIndexes = {11};
+    std::vector<uint8_t> operandValues = {
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0
+    };
+    const std::vector<hidl_memory> pools = {};
+
+    return {
+        .operands = operands,
+        .operations = operations,
+        .inputIndexes = inputIndexes,
+        .outputIndexes = outputIndexes,
+        .operandValues = operandValues,
+        .pools = pools,
+    };
+}
+
+inline bool is_ignored_different_weight_as_input(int i) {
+  static std::set<int> ignore = {};
+  return ignore.find(i) != ignore.end();
+}
+
+// Create the model
+Model createTestModel_layout_nhwc() {
+    Operand::ExtraParams extraParams1;
+    extraParams1.channelQuant(SymmPerChannelQuantParams{.scales={1.0f, 0.5f}, .channelDim=3});
+    const std::vector<Operand> operands = {
+        {
+            .type = OperandType::TENSOR_QUANT8_ASYMM,
+            .dimensions = {1, 3, 3, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.5f,
+            .zeroPoint = 128,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,
+            .dimensions = {1, 2, 2, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 0, .length = 8},
+            .extraParams = std::move(extraParams1),
+        },
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 8, .length = 8},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 16, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 20, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 24, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 28, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 32, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 36, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 40, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 44, .length = 4},
+        },
+        {
+            .type = OperandType::BOOL,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 48, .length = 1},
+        },
+        {
+            .type = OperandType::TENSOR_QUANT8_ASYMM,
+            .dimensions = {1, 2, 2, 2},
+            .numberOfConsumers = 0,
+            .scale = 1.0f,
+            .zeroPoint = 128,
+            .lifetime = OperandLifeTime::MODEL_OUTPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        }
+    };
+
+    const std::vector<Operation> operations = {
+        {
+            .type = OperationType::DEPTHWISE_CONV_2D,
+            .inputs = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
+            .outputs = {12},
+        }
+    };
+
+    const std::vector<uint32_t> inputIndexes = {0};
+    const std::vector<uint32_t> outputIndexes = {12};
+    std::vector<uint8_t> operandValues = {
+      1, 1, 1, 1, 1, 1, 1, 1, 4, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0
+    };
+    const std::vector<hidl_memory> pools = {};
+
+    return {
+        .operands = operands,
+        .operations = operations,
+        .inputIndexes = inputIndexes,
+        .outputIndexes = outputIndexes,
+        .operandValues = operandValues,
+        .pools = pools,
+    };
+}
+
+inline bool is_ignored_layout_nhwc(int i) {
+  static std::set<int> ignore = {};
+  return ignore.find(i) != ignore.end();
+}
+
+// Create the model
+Model createTestModel_layout_nhwc_weight_as_input() {
+    Operand::ExtraParams extraParams1;
+    extraParams1.channelQuant(SymmPerChannelQuantParams{.scales={1.0f, 0.5f}, .channelDim=3});
+    const std::vector<Operand> operands = {
+        {
+            .type = OperandType::TENSOR_QUANT8_ASYMM,
+            .dimensions = {1, 3, 3, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.5f,
+            .zeroPoint = 128,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,
+            .dimensions = {1, 2, 2, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+            .extraParams = std::move(extraParams1),
+        },
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 0, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 4, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 8, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 12, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 16, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 20, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 24, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 28, .length = 4},
+        },
+        {
+            .type = OperandType::BOOL,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 32, .length = 1},
+        },
+        {
+            .type = OperandType::TENSOR_QUANT8_ASYMM,
+            .dimensions = {1, 2, 2, 2},
+            .numberOfConsumers = 0,
+            .scale = 1.0f,
+            .zeroPoint = 128,
+            .lifetime = OperandLifeTime::MODEL_OUTPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        }
+    };
+
+    const std::vector<Operation> operations = {
+        {
+            .type = OperationType::DEPTHWISE_CONV_2D,
+            .inputs = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
+            .outputs = {12},
+        }
+    };
+
+    const std::vector<uint32_t> inputIndexes = {0, 1, 2};
+    const std::vector<uint32_t> outputIndexes = {12};
+    std::vector<uint8_t> operandValues = {
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0
+    };
+    const std::vector<hidl_memory> pools = {};
+
+    return {
+        .operands = operands,
+        .operations = operations,
+        .inputIndexes = inputIndexes,
+        .outputIndexes = outputIndexes,
+        .operandValues = operandValues,
+        .pools = pools,
+    };
+}
+
+inline bool is_ignored_layout_nhwc_weight_as_input(int i) {
+  static std::set<int> ignore = {};
+  return ignore.find(i) != ignore.end();
+}
+
+// Create the model
+Model createTestModel_layout_nchw() {
+    Operand::ExtraParams extraParams1;
+    extraParams1.channelQuant(SymmPerChannelQuantParams{.scales={1.0f, 0.5f}, .channelDim=3});
+    const std::vector<Operand> operands = {
+        {
+            .type = OperandType::TENSOR_QUANT8_ASYMM,
+            .dimensions = {1, 2, 3, 3},
+            .numberOfConsumers = 1,
+            .scale = 0.5f,
+            .zeroPoint = 128,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,
+            .dimensions = {1, 2, 2, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 0, .length = 8},
+            .extraParams = std::move(extraParams1),
+        },
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 8, .length = 8},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 16, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 20, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 24, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 28, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 32, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 36, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 40, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 44, .length = 4},
+        },
+        {
+            .type = OperandType::BOOL,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 48, .length = 1},
+        },
+        {
+            .type = OperandType::TENSOR_QUANT8_ASYMM,
+            .dimensions = {1, 2, 2, 2},
+            .numberOfConsumers = 0,
+            .scale = 1.0f,
+            .zeroPoint = 128,
+            .lifetime = OperandLifeTime::MODEL_OUTPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        }
+    };
+
+    const std::vector<Operation> operations = {
+        {
+            .type = OperationType::DEPTHWISE_CONV_2D,
+            .inputs = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
+            .outputs = {12},
+        }
+    };
+
+    const std::vector<uint32_t> inputIndexes = {0};
+    const std::vector<uint32_t> outputIndexes = {12};
+    std::vector<uint8_t> operandValues = {
+      1, 1, 1, 1, 1, 1, 1, 1, 4, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1
+    };
+    const std::vector<hidl_memory> pools = {};
+
+    return {
+        .operands = operands,
+        .operations = operations,
+        .inputIndexes = inputIndexes,
+        .outputIndexes = outputIndexes,
+        .operandValues = operandValues,
+        .pools = pools,
+    };
+}
+
+inline bool is_ignored_layout_nchw(int i) {
+  static std::set<int> ignore = {};
+  return ignore.find(i) != ignore.end();
+}
+
+// Create the model
+Model createTestModel_layout_nchw_weight_as_input() {
+    Operand::ExtraParams extraParams1;
+    extraParams1.channelQuant(SymmPerChannelQuantParams{.scales={1.0f, 0.5f}, .channelDim=3});
+    const std::vector<Operand> operands = {
+        {
+            .type = OperandType::TENSOR_QUANT8_ASYMM,
+            .dimensions = {1, 2, 3, 3},
+            .numberOfConsumers = 1,
+            .scale = 0.5f,
+            .zeroPoint = 128,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,
+            .dimensions = {1, 2, 2, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+            .extraParams = std::move(extraParams1),
+        },
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 0, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 4, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 8, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 12, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 16, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 20, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 24, .length = 4},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 28, .length = 4},
+        },
+        {
+            .type = OperandType::BOOL,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 32, .length = 1},
+        },
+        {
+            .type = OperandType::TENSOR_QUANT8_ASYMM,
+            .dimensions = {1, 2, 2, 2},
+            .numberOfConsumers = 0,
+            .scale = 1.0f,
+            .zeroPoint = 128,
+            .lifetime = OperandLifeTime::MODEL_OUTPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        }
+    };
+
+    const std::vector<Operation> operations = {
+        {
+            .type = OperationType::DEPTHWISE_CONV_2D,
+            .inputs = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
+            .outputs = {12},
+        }
+    };
+
+    const std::vector<uint32_t> inputIndexes = {0, 1, 2};
+    const std::vector<uint32_t> outputIndexes = {12};
+    std::vector<uint8_t> operandValues = {
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1
+    };
+    const std::vector<hidl_memory> pools = {};
+
+    return {
+        .operands = operands,
+        .operations = operations,
+        .inputIndexes = inputIndexes,
+        .outputIndexes = outputIndexes,
+        .operandValues = operandValues,
+        .pools = pools,
+    };
+}
+
+inline bool is_ignored_layout_nchw_weight_as_input(int i) {
+  static std::set<int> ignore = {};
+  return ignore.find(i) != ignore.end();
+}
+
diff --git a/runtime/test/generated/vts_models/embedding_lookup_sparse.model.cpp b/runtime/test/generated/vts_models/embedding_lookup_sparse.model.cpp
new file mode 100644
index 0000000..9e3f109
--- /dev/null
+++ b/runtime/test/generated/vts_models/embedding_lookup_sparse.model.cpp
@@ -0,0 +1,1182 @@
+// clang-format off
+// Generated file (from: embedding_lookup_sparse.mod.py). Do not edit
+// Create the model
+Model createTestModel() {
+    const std::vector<Operand> operands = {
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {3, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 0, .length = 4},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {4, 3, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {3, 3, 2},
+            .numberOfConsumers = 0,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_OUTPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        }
+    };
+
+    const std::vector<Operation> operations = {
+        {
+            .type = OperationType::EMBEDDING_LOOKUP_SPARSE,
+            .inputs = {0, 1, 2, 3, 4, 5},
+            .outputs = {6},
+        }
+    };
+
+    const std::vector<uint32_t> inputIndexes = {0, 1, 2, 3, 5};
+    const std::vector<uint32_t> outputIndexes = {6};
+    std::vector<uint8_t> operandValues = {
+      0, 0, 0, 0
+    };
+    const std::vector<hidl_memory> pools = {};
+
+    return {
+        .operands = operands,
+        .operations = operations,
+        .inputIndexes = inputIndexes,
+        .outputIndexes = outputIndexes,
+        .operandValues = operandValues,
+        .pools = pools,
+    };
+}
+
+inline bool is_ignored(int i) {
+  static std::set<int> ignore = {};
+  return ignore.find(i) != ignore.end();
+}
+
+// Create the model
+Model createTestModel_relaxed() {
+    const std::vector<Operand> operands = {
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {3, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 0, .length = 4},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {4, 3, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {3, 3, 2},
+            .numberOfConsumers = 0,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_OUTPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        }
+    };
+
+    const std::vector<Operation> operations = {
+        {
+            .type = OperationType::EMBEDDING_LOOKUP_SPARSE,
+            .inputs = {0, 1, 2, 3, 4, 5},
+            .outputs = {6},
+        }
+    };
+
+    const std::vector<uint32_t> inputIndexes = {0, 1, 2, 3, 5};
+    const std::vector<uint32_t> outputIndexes = {6};
+    std::vector<uint8_t> operandValues = {
+      0, 0, 0, 0
+    };
+    const std::vector<hidl_memory> pools = {};
+
+    return {
+        .operands = operands,
+        .operations = operations,
+        .inputIndexes = inputIndexes,
+        .outputIndexes = outputIndexes,
+        .operandValues = operandValues,
+        .pools = pools,
+        .relaxComputationFloat32toFloat16 = true,
+    };
+}
+
+inline bool is_ignored_relaxed(int i) {
+  static std::set<int> ignore = {};
+  return ignore.find(i) != ignore.end();
+}
+
+// Create the model
+Model createTestModel_float16() {
+    const std::vector<Operand> operands = {
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {3, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT16,
+            .dimensions = {3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 0, .length = 4},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT16,
+            .dimensions = {4, 3, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT16,
+            .dimensions = {3, 3, 2},
+            .numberOfConsumers = 0,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_OUTPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        }
+    };
+
+    const std::vector<Operation> operations = {
+        {
+            .type = OperationType::EMBEDDING_LOOKUP_SPARSE,
+            .inputs = {0, 1, 2, 3, 4, 5},
+            .outputs = {6},
+        }
+    };
+
+    const std::vector<uint32_t> inputIndexes = {0, 1, 2, 3, 5};
+    const std::vector<uint32_t> outputIndexes = {6};
+    std::vector<uint8_t> operandValues = {
+      0, 0, 0, 0
+    };
+    const std::vector<hidl_memory> pools = {};
+
+    return {
+        .operands = operands,
+        .operations = operations,
+        .inputIndexes = inputIndexes,
+        .outputIndexes = outputIndexes,
+        .operandValues = operandValues,
+        .pools = pools,
+    };
+}
+
+inline bool is_ignored_float16(int i) {
+  static std::set<int> ignore = {};
+  return ignore.find(i) != ignore.end();
+}
+
+// Create the model
+Model createTestModel_2() {
+    const std::vector<Operand> operands = {
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {3, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 0, .length = 4},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {4, 3, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {3, 3, 2},
+            .numberOfConsumers = 0,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_OUTPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        }
+    };
+
+    const std::vector<Operation> operations = {
+        {
+            .type = OperationType::EMBEDDING_LOOKUP_SPARSE,
+            .inputs = {0, 1, 2, 3, 4, 5},
+            .outputs = {6},
+        }
+    };
+
+    const std::vector<uint32_t> inputIndexes = {0, 1, 2, 3, 5};
+    const std::vector<uint32_t> outputIndexes = {6};
+    std::vector<uint8_t> operandValues = {
+      1, 0, 0, 0
+    };
+    const std::vector<hidl_memory> pools = {};
+
+    return {
+        .operands = operands,
+        .operations = operations,
+        .inputIndexes = inputIndexes,
+        .outputIndexes = outputIndexes,
+        .operandValues = operandValues,
+        .pools = pools,
+    };
+}
+
+inline bool is_ignored_2(int i) {
+  static std::set<int> ignore = {};
+  return ignore.find(i) != ignore.end();
+}
+
+// Create the model
+Model createTestModel_relaxed_2() {
+    const std::vector<Operand> operands = {
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {3, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 0, .length = 4},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {4, 3, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {3, 3, 2},
+            .numberOfConsumers = 0,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_OUTPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        }
+    };
+
+    const std::vector<Operation> operations = {
+        {
+            .type = OperationType::EMBEDDING_LOOKUP_SPARSE,
+            .inputs = {0, 1, 2, 3, 4, 5},
+            .outputs = {6},
+        }
+    };
+
+    const std::vector<uint32_t> inputIndexes = {0, 1, 2, 3, 5};
+    const std::vector<uint32_t> outputIndexes = {6};
+    std::vector<uint8_t> operandValues = {
+      1, 0, 0, 0
+    };
+    const std::vector<hidl_memory> pools = {};
+
+    return {
+        .operands = operands,
+        .operations = operations,
+        .inputIndexes = inputIndexes,
+        .outputIndexes = outputIndexes,
+        .operandValues = operandValues,
+        .pools = pools,
+        .relaxComputationFloat32toFloat16 = true,
+    };
+}
+
+inline bool is_ignored_relaxed_2(int i) {
+  static std::set<int> ignore = {};
+  return ignore.find(i) != ignore.end();
+}
+
+// Create the model
+Model createTestModel_float16_2() {
+    const std::vector<Operand> operands = {
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {3, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT16,
+            .dimensions = {3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 0, .length = 4},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT16,
+            .dimensions = {4, 3, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT16,
+            .dimensions = {3, 3, 2},
+            .numberOfConsumers = 0,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_OUTPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        }
+    };
+
+    const std::vector<Operation> operations = {
+        {
+            .type = OperationType::EMBEDDING_LOOKUP_SPARSE,
+            .inputs = {0, 1, 2, 3, 4, 5},
+            .outputs = {6},
+        }
+    };
+
+    const std::vector<uint32_t> inputIndexes = {0, 1, 2, 3, 5};
+    const std::vector<uint32_t> outputIndexes = {6};
+    std::vector<uint8_t> operandValues = {
+      1, 0, 0, 0
+    };
+    const std::vector<hidl_memory> pools = {};
+
+    return {
+        .operands = operands,
+        .operations = operations,
+        .inputIndexes = inputIndexes,
+        .outputIndexes = outputIndexes,
+        .operandValues = operandValues,
+        .pools = pools,
+    };
+}
+
+inline bool is_ignored_float16_2(int i) {
+  static std::set<int> ignore = {};
+  return ignore.find(i) != ignore.end();
+}
+
+// Create the model
+Model createTestModel_3() {
+    const std::vector<Operand> operands = {
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {3, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 0, .length = 4},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {4, 3, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {3, 3, 2},
+            .numberOfConsumers = 0,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_OUTPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        }
+    };
+
+    const std::vector<Operation> operations = {
+        {
+            .type = OperationType::EMBEDDING_LOOKUP_SPARSE,
+            .inputs = {0, 1, 2, 3, 4, 5},
+            .outputs = {6},
+        }
+    };
+
+    const std::vector<uint32_t> inputIndexes = {0, 1, 2, 3, 5};
+    const std::vector<uint32_t> outputIndexes = {6};
+    std::vector<uint8_t> operandValues = {
+      2, 0, 0, 0
+    };
+    const std::vector<hidl_memory> pools = {};
+
+    return {
+        .operands = operands,
+        .operations = operations,
+        .inputIndexes = inputIndexes,
+        .outputIndexes = outputIndexes,
+        .operandValues = operandValues,
+        .pools = pools,
+    };
+}
+
+inline bool is_ignored_3(int i) {
+  static std::set<int> ignore = {};
+  return ignore.find(i) != ignore.end();
+}
+
+// Create the model
+Model createTestModel_relaxed_3() {
+    const std::vector<Operand> operands = {
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {3, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 0, .length = 4},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {4, 3, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {3, 3, 2},
+            .numberOfConsumers = 0,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_OUTPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        }
+    };
+
+    const std::vector<Operation> operations = {
+        {
+            .type = OperationType::EMBEDDING_LOOKUP_SPARSE,
+            .inputs = {0, 1, 2, 3, 4, 5},
+            .outputs = {6},
+        }
+    };
+
+    const std::vector<uint32_t> inputIndexes = {0, 1, 2, 3, 5};
+    const std::vector<uint32_t> outputIndexes = {6};
+    std::vector<uint8_t> operandValues = {
+      2, 0, 0, 0
+    };
+    const std::vector<hidl_memory> pools = {};
+
+    return {
+        .operands = operands,
+        .operations = operations,
+        .inputIndexes = inputIndexes,
+        .outputIndexes = outputIndexes,
+        .operandValues = operandValues,
+        .pools = pools,
+        .relaxComputationFloat32toFloat16 = true,
+    };
+}
+
+inline bool is_ignored_relaxed_3(int i) {
+  static std::set<int> ignore = {};
+  return ignore.find(i) != ignore.end();
+}
+
+// Create the model
+Model createTestModel_float16_3() {
+    const std::vector<Operand> operands = {
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {3, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT16,
+            .dimensions = {3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 0, .length = 4},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT16,
+            .dimensions = {4, 3, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT16,
+            .dimensions = {3, 3, 2},
+            .numberOfConsumers = 0,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_OUTPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        }
+    };
+
+    const std::vector<Operation> operations = {
+        {
+            .type = OperationType::EMBEDDING_LOOKUP_SPARSE,
+            .inputs = {0, 1, 2, 3, 4, 5},
+            .outputs = {6},
+        }
+    };
+
+    const std::vector<uint32_t> inputIndexes = {0, 1, 2, 3, 5};
+    const std::vector<uint32_t> outputIndexes = {6};
+    std::vector<uint8_t> operandValues = {
+      2, 0, 0, 0
+    };
+    const std::vector<hidl_memory> pools = {};
+
+    return {
+        .operands = operands,
+        .operations = operations,
+        .inputIndexes = inputIndexes,
+        .outputIndexes = outputIndexes,
+        .operandValues = operandValues,
+        .pools = pools,
+    };
+}
+
+inline bool is_ignored_float16_3(int i) {
+  static std::set<int> ignore = {};
+  return ignore.find(i) != ignore.end();
+}
+
+// Create the model
+Model createTestModel_4() {
+    const std::vector<Operand> operands = {
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {3, 3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 0, .length = 4},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {4, 3, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {3, 2, 3, 2},
+            .numberOfConsumers = 0,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_OUTPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        }
+    };
+
+    const std::vector<Operation> operations = {
+        {
+            .type = OperationType::EMBEDDING_LOOKUP_SPARSE,
+            .inputs = {0, 1, 2, 3, 4, 5},
+            .outputs = {6},
+        }
+    };
+
+    const std::vector<uint32_t> inputIndexes = {0, 1, 2, 3, 5};
+    const std::vector<uint32_t> outputIndexes = {6};
+    std::vector<uint8_t> operandValues = {
+      0, 0, 0, 0
+    };
+    const std::vector<hidl_memory> pools = {};
+
+    return {
+        .operands = operands,
+        .operations = operations,
+        .inputIndexes = inputIndexes,
+        .outputIndexes = outputIndexes,
+        .operandValues = operandValues,
+        .pools = pools,
+    };
+}
+
+inline bool is_ignored_4(int i) {
+  static std::set<int> ignore = {};
+  return ignore.find(i) != ignore.end();
+}
+
+// Create the model
+Model createTestModel_relaxed_4() {
+    const std::vector<Operand> operands = {
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {3, 3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 0, .length = 4},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {4, 3, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT32,
+            .dimensions = {3, 2, 3, 2},
+            .numberOfConsumers = 0,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_OUTPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        }
+    };
+
+    const std::vector<Operation> operations = {
+        {
+            .type = OperationType::EMBEDDING_LOOKUP_SPARSE,
+            .inputs = {0, 1, 2, 3, 4, 5},
+            .outputs = {6},
+        }
+    };
+
+    const std::vector<uint32_t> inputIndexes = {0, 1, 2, 3, 5};
+    const std::vector<uint32_t> outputIndexes = {6};
+    std::vector<uint8_t> operandValues = {
+      0, 0, 0, 0
+    };
+    const std::vector<hidl_memory> pools = {};
+
+    return {
+        .operands = operands,
+        .operations = operations,
+        .inputIndexes = inputIndexes,
+        .outputIndexes = outputIndexes,
+        .operandValues = operandValues,
+        .pools = pools,
+        .relaxComputationFloat32toFloat16 = true,
+    };
+}
+
+inline bool is_ignored_relaxed_4(int i) {
+  static std::set<int> ignore = {};
+  return ignore.find(i) != ignore.end();
+}
+
+// Create the model
+Model createTestModel_float16_4() {
+    const std::vector<Operand> operands = {
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {3, 3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_INT32,
+            .dimensions = {3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT16,
+            .dimensions = {3},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::INT32,
+            .dimensions = {},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::CONSTANT_COPY,
+            .location = {.poolIndex = 0, .offset = 0, .length = 4},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT16,
+            .dimensions = {4, 3, 2},
+            .numberOfConsumers = 1,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_INPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        },
+        {
+            .type = OperandType::TENSOR_FLOAT16,
+            .dimensions = {3, 2, 3, 2},
+            .numberOfConsumers = 0,
+            .scale = 0.0f,
+            .zeroPoint = 0,
+            .lifetime = OperandLifeTime::MODEL_OUTPUT,
+            .location = {.poolIndex = 0, .offset = 0, .length = 0},
+        }
+    };
+
+    const std::vector<Operation> operations = {
+        {
+            .type = OperationType::EMBEDDING_LOOKUP_SPARSE,
+            .inputs = {0, 1, 2, 3, 4, 5},
+            .outputs = {6},
+        }
+    };
+
+    const std::vector<uint32_t> inputIndexes = {0, 1, 2, 3, 5};
+    const std::vector<uint32_t> outputIndexes = {6};
+    std::vector<uint8_t> operandValues = {
+      0, 0, 0, 0
+    };
+    const std::vector<hidl_memory> pools = {};
+
+    return {
+        .operands = operands,
+        .operations = operations,
+        .inputIndexes = inputIndexes,
+        .outputIndexes = outputIndexes,
+        .operandValues = operandValues,
+        .pools = pools,
+    };
+}
+
+inline bool is_ignored_float16_4(int i) {
+  static std::set<int> ignore = {};
+  return ignore.find(i) != ignore.end();
+}
+
diff --git a/runtime/test/specs/V1_2/depthwise_conv2d_per_channel.mod.py b/runtime/test/specs/V1_2/depthwise_conv2d_per_channel.mod.py
new file mode 100644
index 0000000..152a808
--- /dev/null
+++ b/runtime/test/specs/V1_2/depthwise_conv2d_per_channel.mod.py
@@ -0,0 +1,63 @@
+#
+# Copyright (C) 2018 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# TEST 1: Same scales, zeroPoint = 0
+i1 = Input("op1", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 2}, 0.5f, 0")
+f1 = Parameter("op2", "TENSOR_QUANT8_SYMM_PER_CHANNEL", "{1, 2, 2, 2}, 0.0f, 0",
+               [2, 4,  2, 0,  2, 2,  2, 0],
+               extraParams = SymmPerChannelQuantParams(channelDim=3, scales=[0.5, 0.5]))
+b1 = Parameter("op3", "TENSOR_INT32", "{2}", [0, 0])
+o1 = Output("op4", "TENSOR_QUANT8_ASYMM", "{1, 1, 1, 2}, 1.f, 0")
+Model("same").Operation("DEPTHWISE_CONV_2D", i1, f1, b1, 0, 0, 0, 0, 1, 1, 1, 0).To(o1)
+
+# Instantiate an example
+Example({
+    i1: [4, 16, 4, 32, 4, 64, 4, 128],
+    o1: [8, 48],
+}).AddInput(f1, b1)
+
+
+# TEST 2: Different scales, zeroPoint=128
+i2 = Input("op1", "TENSOR_QUANT8_ASYMM", "{1, 3, 3, 2}, 0.5f, 128")
+f2 = Parameter("op2", "TENSOR_QUANT8_SYMM_PER_CHANNEL", "{1, 2, 2, 2}, 0.0f, 0",
+               [1, 1, 1, 1, 1, 1, 1, 1],
+               extraParams = SymmPerChannelQuantParams(channelDim=3, scales=[1.0, 0.5]))
+b2 = Parameter("op3", "TENSOR_INT32", "{2}", [4, 4])
+o2 = Output("op4", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 2}, 1.f, 128")
+Model("different").Operation("DEPTHWISE_CONV_2D", i2, f2, b2, 0, 0, 0, 0, 1, 1, 2, 0).To(o2)
+
+# Instantiate an example
+Example({
+    i2: [129, 130] * 9,
+    o2: [132, 130, 128, 128, 132, 130, 129, 128],
+}).AddInput(f2, b2)
+
+
+layout = BoolScalar("layout", False) # NHWC
+
+# TEST 3: With layout param
+i3 = Input("op1", "TENSOR_QUANT8_ASYMM", "{1, 3, 3, 2}, 0.5f, 128")
+f3 = Parameter("op2", "TENSOR_QUANT8_SYMM_PER_CHANNEL", "{1, 2, 2, 2}, 0.0f, 0",
+               [1, 1, 1, 1, 1, 1, 1, 1],
+               extraParams = SymmPerChannelQuantParams(channelDim=3, scales=[1.0, 0.5]))
+b3 = Parameter("op3", "TENSOR_INT32", "{2}", [4, 4])
+o3 = Output("op4", "TENSOR_QUANT8_ASYMM", "{1, 2, 2, 2}, 1.f, 128")
+Model("layout").Operation("DEPTHWISE_CONV_2D", i3, f3, b3, 0, 0, 0, 0, 1, 1, 2, 0, layout).To(o3)
+
+# Instantiate an example
+Example({
+    i3: [129, 130] * 9,
+    o3: [132, 130, 128, 128, 132, 130, 129, 128],
+}).AddNchw(i3, o3, layout).AddInput(f3, b3)