Added CMSIS-NN specialization for int8 conv op.

Change-Id: I0b15db09fc168d8d9abee9989c0f50e1f2cd21fd
diff --git a/tensorflow/lite/experimental/micro/kernels/cmsis-nn/conv.cc b/tensorflow/lite/experimental/micro/kernels/cmsis-nn/conv.cc
new file mode 100644
index 0000000..9203ce8
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/kernels/cmsis-nn/conv.cc
@@ -0,0 +1,353 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "arm_nnfunctions.h"
+#include "tensorflow/lite/c/builtin_op_data.h"
+#include "tensorflow/lite/c/c_api_internal.h"
+#include "tensorflow/lite/kernels/internal/common.h"
+#include "tensorflow/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/lite/kernels/internal/reference/conv.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/conv.h"
+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/kernels/padding.h"
+#include "tensorflow/lite/experimental/micro/kernels/cmsis-nn/scratch_buffer.h"
+
+namespace tflite {
+namespace ops {
+namespace micro {
+namespace conv {
+
+constexpr int kInputTensor = 0;
+constexpr int kFilterTensor = 1;
+constexpr int kBiasTensor = 2;
+constexpr int kOutputTensor = 0;
+constexpr int kMaxChannels = 64;
+
+const int kTensorNotAllocated = -1;
+
+struct OpData {
+  TfLitePaddingValues padding;
+  // The scaling factor from input to output (aka the 'real multiplier') can
+  // be represented as a fixed point multiplier plus a left shift.
+  int32_t output_multiplier;
+  int output_shift;
+
+  // Per channel output multiplier and shift.
+  // TODO(b/141139247): Allocate these dynamically when possible.
+  int32_t per_channel_output_multiplier[kMaxChannels];
+  int32_t per_channel_output_shift[kMaxChannels];
+
+  // The range of the fused activation layer. For example for kNone and
+  // uint8_t these would be 0 and 255.
+  int32_t output_activation_min;
+  int32_t output_activation_max;
+};
+
+inline PaddingType RuntimePaddingType(TfLitePadding padding) {
+  switch (padding) {
+    case TfLitePadding::kTfLitePaddingSame:
+      return PaddingType::kSame;
+    case TfLitePadding::kTfLitePaddingValid:
+      return PaddingType::kValid;
+    case TfLitePadding::kTfLitePaddingUnknown:
+    default:
+      return PaddingType::kNone;
+  }
+}
+
+TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
+                             TfLiteConvParams* params, int width, int height,
+                             int filter_width, int filter_height, int out_width,
+                             int out_height, const TfLiteType data_type,
+                             OpData* data) {
+  bool has_bias = node->inputs->size == 3;
+  // Check number of inputs/outputs
+  TF_LITE_ENSURE(context, has_bias || node->inputs->size == 2);
+  TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
+
+  // Matching GetWindowedOutputSize in TensorFlow.
+  auto padding = params->padding;
+  data->padding = ComputePaddingHeightWidth(
+      params->stride_height, params->stride_width,
+      params->dilation_height_factor, params->dilation_width_factor, height,
+      width, filter_height, filter_width, padding, &out_height, &out_width);
+
+  // Note that quantized inference requires that all tensors have their
+  // parameters set. This is usually done during quantized training.
+  if (data_type != kTfLiteFloat32) {
+    const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+    const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
+    const TfLiteTensor* bias =
+        GetOptionalInputTensor(context, node, kBiasTensor);
+    TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+
+    TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
+        context, input, filter, bias, output, params->activation,
+        &data->output_multiplier, &data->output_shift,
+        &data->output_activation_min, &data->output_activation_max,
+        data->per_channel_output_multiplier,
+        reinterpret_cast<int*>(data->per_channel_output_shift)));
+  }
+  return kTfLiteOk;
+}
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  return nullptr;
+}
+
+void Free(TfLiteContext* context, void* buffer) {}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  return kTfLiteOk;
+}
+
+TfLiteStatus EvalQuantized(
+                   TfLiteContext* context, TfLiteNode* node,
+                   TfLiteConvParams* params, OpData* data,
+                   const TfLiteTensor* input, const TfLiteTensor* filter,
+                   const TfLiteTensor* bias, TfLiteTensor* im2col,
+                   TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
+  const int32_t input_offset = -input->params.zero_point;
+  const int32_t filter_offset = -filter->params.zero_point;
+  const int32_t output_offset = output->params.zero_point;
+
+  ConvParams op_params;
+  op_params.padding_type = RuntimePaddingType(params->padding);
+  op_params.padding_values.width = data->padding.width;
+  op_params.padding_values.height = data->padding.height;
+  op_params.stride_width = params->stride_width;
+  op_params.stride_height = params->stride_height;
+  op_params.dilation_width_factor = params->dilation_width_factor;
+  op_params.dilation_height_factor = params->dilation_height_factor;
+  op_params.input_offset = input_offset;
+  op_params.weights_offset = filter_offset;
+  op_params.output_offset = output_offset;
+  op_params.output_multiplier = data->output_multiplier;
+  op_params.output_shift = -data->output_shift;
+  op_params.quantized_activation_min = data->output_activation_min;
+  op_params.quantized_activation_max = data->output_activation_max;
+  reference_ops::Conv(op_params, GetTensorShape(input),
+                      GetTensorData<uint8_t>(input), GetTensorShape(filter),
+                      GetTensorData<uint8_t>(filter), GetTensorShape(bias),
+                      GetTensorData<int32_t>(bias), GetTensorShape(output),
+                      GetTensorData<uint8_t>(output), GetTensorShape(im2col),
+                      GetTensorData<uint8_t>(im2col), nullptr);
+  return kTfLiteOk;
+}
+
+TfLiteStatus EvalQuantizedPerChannel(
+                              TfLiteContext* context, TfLiteNode* node,
+                              TfLiteConvParams* params, OpData* data,
+                              const TfLiteTensor* input,
+                              const TfLiteTensor* filter,
+                              const TfLiteTensor* bias, TfLiteTensor* output,
+                              TfLiteTensor* im2col) {
+#if defined(ARM_MATH_DSP) && defined(ARM_MATH_LOOPUNROLL)
+  ConvParams op_params;
+  op_params.input_offset = -input->params.zero_point;
+  op_params.output_offset = output->params.zero_point;
+  op_params.stride_height = params->stride_height;
+  op_params.stride_width = params->stride_width;
+  op_params.dilation_height_factor = params->dilation_height_factor;
+  op_params.dilation_width_factor = params->dilation_width_factor;
+  op_params.padding_values.height = data->padding.height;
+  op_params.padding_values.width = data->padding.width;
+
+  RuntimeShape filter_shape = GetTensorShape(filter);
+  RuntimeShape input_shape = GetTensorShape(input);
+  RuntimeShape output_shape = GetTensorShape(output);
+  RuntimeShape bias_shape = GetTensorShape(bias);
+
+  // TODO(b/130439627): Use calculated value for clamping.
+  const int32 output_activation_min = std::numeric_limits<int8_t>::min();
+  const int32 output_activation_max = std::numeric_limits<int8_t>::max();
+
+  // Sanity check.
+  TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
+  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
+  if (GetTensorData<int8_t>(bias)) {
+    TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth);
+  }
+
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+  int16_t* buf = nullptr;
+
+  if (op_params.padding_values.width == 0 &&
+      op_params.padding_values.height == 0 &&
+      (input_depth % 4 == 0) &&
+      (output_depth % 2 == 0) &&
+      op_params.stride_width == 1 &&
+      op_params.stride_height == 1 &&
+      filter_width == 1 &&
+      filter_height == 1) {
+    const int32_t buf_size =
+      arm_convolve_1x1_s8_fast_get_buffer_size(input_depth);
+    if (get_cmsis_scratch_buffer(context, &buf, buf_size) != kTfLiteOk) {
+      return kTfLiteError;
+    }
+    if (arm_convolve_1x1_s8_fast(GetTensorData<int8_t>(input),
+                                input_width, input_height, input_depth, batches,
+                                GetTensorData<int8_t>(filter),
+                                output_depth,
+                                op_params.padding_values.width,
+                                op_params.padding_values.height,
+                                op_params.stride_width, op_params.stride_height,
+                                GetTensorData<int32>(bias),
+                                GetTensorData<int8_t>(output),
+                                data->per_channel_output_shift,
+                                data->per_channel_output_multiplier,
+                                op_params.output_offset,
+                                op_params.input_offset,
+                                output_activation_min, output_activation_max,
+                                output_width, output_height,
+                                buf) != ARM_MATH_SUCCESS) {
+        return kTfLiteError;
+    }
+  } else {
+    const int32_t buf_size = arm_convolve_s8_get_buffer_size(input_depth,
+                                                             filter_width,
+                                                             filter_height);
+    if (get_cmsis_scratch_buffer(context, &buf, buf_size) != kTfLiteOk) {
+      return kTfLiteError;
+    }
+    if (arm_convolve_s8(GetTensorData<int8_t>(input),
+                        input_width, input_height, input_depth, batches,
+                        GetTensorData<int8_t>(filter),
+                        output_depth,
+                        filter_width, filter_height,
+                        op_params.padding_values.width,
+                        op_params.padding_values.height,
+                        op_params.stride_width, op_params.stride_height,
+                        GetTensorData<int32>(bias),
+                        GetTensorData<int8_t>(output),
+                        data->per_channel_output_shift,
+                        data->per_channel_output_multiplier,
+                        op_params.output_offset,
+                        op_params.input_offset,
+                        output_activation_min, output_activation_max,
+                        output_width, output_height,
+                        buf)  != ARM_MATH_SUCCESS) {
+      return kTfLiteError;
+    }
+  }
+#else
+  #error ARM_MATH_DSP and ARM_MATH_LOOPUNROLL must be set
+#endif
+  return kTfLiteOk;
+}
+
+TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
+                       TfLiteConvParams* params, OpData* data,
+                       const TfLiteTensor* input, const TfLiteTensor* filter,
+                       const TfLiteTensor* bias, TfLiteTensor* im2col,
+                       TfLiteTensor* hwcn_weights, TfLiteTensor* output) {
+  float output_activation_min, output_activation_max;
+  CalculateActivationRange(params->activation, &output_activation_min,
+                           &output_activation_max);
+
+  ConvParams op_params;
+  op_params.padding_type = RuntimePaddingType(params->padding);
+  op_params.padding_values.width = data->padding.width;
+  op_params.padding_values.height = data->padding.height;
+  op_params.stride_width = params->stride_width;
+  op_params.stride_height = params->stride_height;
+  op_params.dilation_width_factor = params->dilation_width_factor;
+  op_params.dilation_height_factor = params->dilation_height_factor;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
+
+  reference_ops::Conv(op_params, GetTensorShape(input),
+                      GetTensorData<float>(input), GetTensorShape(filter),
+                      GetTensorData<float>(filter), GetTensorShape(bias),
+                      GetTensorData<float>(bias), GetTensorShape(output),
+                      GetTensorData<float>(output), GetTensorShape(im2col),
+                      GetTensorData<float>(im2col));
+  return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  auto* params = reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
+
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
+  const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
+
+  int input_width = input->dims->data[2];
+  int input_height = input->dims->data[1];
+  int filter_width = filter->dims->data[2];
+  int filter_height = filter->dims->data[1];
+  int output_width = output->dims->data[2];
+  int output_height = output->dims->data[1];
+
+  OpData data;
+  if (input->type != kTfLiteFloat32) {
+    TF_LITE_ENSURE_EQ(context, filter->quantization.type,
+                      kTfLiteAffineQuantization);
+
+    const auto* affine_quantization =
+        reinterpret_cast<TfLiteAffineQuantization*>(
+            filter->quantization.params);
+    TF_LITE_ENSURE(context, affine_quantization);
+    TF_LITE_ENSURE(context, affine_quantization->scale);
+  }
+
+  TF_LITE_ENSURE_STATUS(CalculateOpData(
+      context, node, params, input_width, input_height, filter_width,
+      filter_height, output_width, output_height, input->type, &data));
+
+  switch (input->type) {  // Already know in/out types are same.
+    case kTfLiteFloat32:
+      return EvalFloat(context, node, params, &data, input, filter, bias,
+                       nullptr, nullptr, output);
+      break;
+    case kTfLiteInt8:
+      return EvalQuantizedPerChannel(context, node, params, &data, input,
+                                     filter, bias, output, nullptr);
+      break;
+    case kTfLiteUInt8:
+      return EvalQuantized(context, node, params, &data, input, filter, bias,
+                           nullptr, nullptr, output);
+      break;
+    default:
+      context->ReportError(context, "Type %s (%d) not supported.",
+                           TfLiteTypeGetName(input->type), input->type);
+      return kTfLiteError;
+  }
+  return kTfLiteOk;
+}
+
+}  // namespace conv
+
+TfLiteRegistration* Register_CONV_2D() {
+  static TfLiteRegistration r = {conv::Init, conv::Free, conv::Prepare,
+                                 conv::Eval};
+  return &r;
+}
+
+}  // namespace micro
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/lite/experimental/micro/kernels/conv.cc b/tensorflow/lite/experimental/micro/kernels/conv.cc
index afd8445..c66ebe6 100644
--- a/tensorflow/lite/experimental/micro/kernels/conv.cc
+++ b/tensorflow/lite/experimental/micro/kernels/conv.cc
@@ -33,7 +33,7 @@
 constexpr int kFilterTensor = 1;
 constexpr int kBiasTensor = 2;
 constexpr int kOutputTensor = 0;
-constexpr int kMaxChannels = 64;
+constexpr int kMaxChannels = 256;
 
 // This file has 2 implementation of Conv.
 
diff --git a/tensorflow/lite/experimental/micro/kernels/conv_test.cc b/tensorflow/lite/experimental/micro/kernels/conv_test.cc
index 80e6336..b8e7552 100644
--- a/tensorflow/lite/experimental/micro/kernels/conv_test.cc
+++ b/tensorflow/lite/experimental/micro/kernels/conv_test.cc
@@ -39,12 +39,20 @@
 static const int kOutputShape[] = {4, 2, 1, 2, 3};
 static const float kGoldenData[] = {18, 2, 5, 18, 2, 5, 17, 4, 3, 37, 4, 3};
 
+static TfLiteConvParams common_conv_params = {
+    kTfLitePaddingValid,  // padding
+    2,                    // stride_width
+    2,                    // stride_height
+    1,                    // dilation_width_factor
+    1,                    // dilation_height_factor
+    kTfLiteActNone,       // activation
+  };
+
 template <typename T>
 TfLiteStatus ValidateConvGoldens(TfLiteTensor* tensors, int tensors_size,
-                                 const T* expected_output_data, T* output_data,
-                                 int output_length,
-                                 TfLiteFusedActivation activation,
-                                 float tolerance = 1e-5) {
+                         const T* expected_output_data, T* output_data,
+                         int output_length, TfLiteConvParams* conv_params,
+                         float tolerance = 1e-5) {
   TfLiteContext context;
   PopulateContext(tensors, tensors_size, &context);
 
@@ -55,16 +63,7 @@
 
   TF_LITE_MICRO_EXPECT_NE(nullptr, registration);
 
-  TfLiteConvParams builtin_data = {
-      kTfLitePaddingValid,  // padding
-      2,                    // stride_width
-      2,                    // stride_height
-      1,                    // dilation_width_factor
-      1,                    // dilation_height_factor
-      activation,           // activation
-  };
-
-  const char* init_data = reinterpret_cast<const char*>(&builtin_data);
+  const char* init_data = reinterpret_cast<const char*>(conv_params);
   size_t init_data_size = 0;
   void* user_data = nullptr;
 
@@ -84,7 +83,7 @@
   node.outputs = outputs_array;
   node.temporaries = temporaries_array;
   node.user_data = user_data;
-  node.builtin_data = reinterpret_cast<void*>(&builtin_data);
+  node.builtin_data = reinterpret_cast<void*>(conv_params);
   node.custom_initial_data = nullptr;
   node.custom_initial_data_size = 0;
   node.delegate = nullptr;
@@ -113,7 +112,7 @@
                    const int* bias_dims_data, const float* bias_data,
                    const int* output_dims_data,
                    const float* expected_output_data, float* output_data,
-                   TfLiteFusedActivation activation) {
+                   TfLiteConvParams* conv_params) {
   TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data);
   TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data);
   TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data);
@@ -132,7 +131,7 @@
   TF_LITE_MICRO_EXPECT_EQ(
       kTfLiteOk,
       ValidateConvGoldens(tensors, tensors_size, expected_output_data,
-                          output_data, output_dims_count, activation));
+                          output_data, output_dims_count, conv_params));
 }
 
 void TestConvQuantizedPerLayer(
@@ -142,7 +141,7 @@
     const int* bias_dims_data, const float* bias_data, int32_t* bias_quantized,
     const int* output_dims_data, const float* expected_output_data,
     uint8_t* expected_output_quantized, uint8_t* output_data,
-    float output_scale, TfLiteFusedActivation activation) {
+    float output_scale, TfLiteConvParams* conv_params) {
   TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data);
   TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data);
   TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data);
@@ -176,7 +175,7 @@
   TF_LITE_MICRO_EXPECT_EQ(
       kTfLiteOk,
       ValidateConvGoldens(tensors, tensors_size, expected_output_quantized,
-                          output_data, output_dims_count, activation));
+                      output_data, output_dims_count, conv_params));
 }
 
 void TestConvQuantizedPerChannel(
@@ -187,7 +186,7 @@
     int32_t* bias_data_quantized, float* bias_scales, int* bias_zero_points,
     const int* output_dims_data, const float* expected_output_data,
     int8_t* expected_output_data_quantized, int8_t* output_data,
-    float output_scale, TfLiteFusedActivation activation) {
+    float output_scale, TfLiteConvParams* conv_params) {
   TfLiteIntArray* input_dims = IntArrayFromInts(input_dims_data);
   TfLiteIntArray* filter_dims = IntArrayFromInts(filter_dims_data);
   TfLiteIntArray* bias_dims = IntArrayFromInts(bias_dims_data);
@@ -241,7 +240,7 @@
   TF_LITE_MICRO_EXPECT_EQ(
       kTfLiteOk,
       ValidateConvGoldens(tensors, tensors_size, expected_output_data_quantized,
-                          output_data, output_dims_count, activation));
+                      output_data, output_dims_count, conv_params));
 }
 
 }  // namespace
@@ -258,7 +257,7 @@
       tflite::testing::kFilterShape, tflite::testing::kFilterData,
       tflite::testing::kBiasShape, tflite::testing::kBiasData,
       tflite::testing::kOutputShape, tflite::testing::kGoldenData, output_data,
-      kTfLiteActNone);
+      &tflite::testing::common_conv_params);
 }
 
 TF_LITE_MICRO_TEST(InputAndFilterSameWidthHeight) {
@@ -275,7 +274,7 @@
   tflite::testing::TestConvFloat(
       tflite::testing::kInputShape, tflite::testing::kInputData, kFilterShape,
       filter_values, kBiasShape, bias_values, kOutputShape, expected_output,
-      output_data, kTfLiteActNone);
+      output_data, &tflite::testing::common_conv_params);
 }
 
 TF_LITE_MICRO_TEST(SimpleTestQuantized) {
@@ -297,7 +296,8 @@
       tflite::testing::kFilterData, filter_quantized, filter_scale,
       tflite::testing::kBiasShape, tflite::testing::kBiasData, bias_quantized,
       tflite::testing::kOutputShape, tflite::testing::kGoldenData,
-      golden_quantized, output_data, output_scale, kTfLiteActNone);
+      golden_quantized, output_data, output_scale,
+      &tflite::testing::common_conv_params);
 }
 
 TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannel) {
@@ -322,7 +322,54 @@
       tflite::testing::kBiasShape, tflite::testing::kBiasData, bias_quantized,
       scales, zero_points, tflite::testing::kOutputShape,
       tflite::testing::kGoldenData, golden_quantized, output_data, output_scale,
-      kTfLiteActNone);
+      &tflite::testing::common_conv_params);
+}
+
+TF_LITE_MICRO_TEST(Kernel1x1QuantizedPerChannel) {
+  // conv params:
+  // padding, stride_<width,height>, dilation_<width, height>, activation
+  TfLiteConvParams conv_params =
+    {kTfLitePaddingValid, 1, 1, 1, 1, kTfLiteActNone};
+  const int kInputShape[] = {4, 1, 2, 2, 4}; // [len,N,H,W,C]
+  const int kInputElements = kInputShape[1] * kInputShape[2] *
+                             kInputShape[3] * kInputShape[4];
+  float kInputData[/* kInputElements */] = {1, 1, 1, 1, 2, 2, 2, 2,
+                                            1, 2, 3, 4, 1, 2, 3, 4};
+  const int kFilterShape[] = {4, 3, 1, 1, 4};
+  const int kFilterElements = kFilterShape[1] * kFilterShape[2] *
+                              kFilterShape[3] * kFilterShape[4];
+  float kFilterData[/* kFilterElements */] =
+                                      {1, 2, 3, 4,
+                                      -1, 1, -1, 1,
+                                      -1, -1, 1, 1};
+  const int kBiasElements = kFilterShape[1];
+  const int kBiasShape[] = {1, kBiasElements};
+  float kBiasData[/* kBiasElements */] = {1, 2, 3};
+  const int kOutputShape[] = {4, 1, 2, 2, kBiasElements};
+  const int kOutputElements = 4 * 3;
+  int8_t output_data[kOutputElements];
+  const float kGoldenData[/* kOutputElements */] =
+                                      {11, 2, 3, 21, 2, 3, 31, 4, 7, 31, 4, 7};
+
+  const float input_scale = 0.5f;
+  const float bias_scale = 0.5f;
+  const float output_scale = 1.0f;
+
+  int8_t input_quantized[kInputElements];
+  int8_t filter_quantized[kFilterElements];
+  int32_t bias_quantized[kBiasElements];
+  int8_t golden_quantized[kOutputElements];
+  int zero_points[kBiasElements + 1];
+  float scales[kBiasElements + 1];
+
+  tflite::testing::TestConvQuantizedPerChannel(
+      kInputShape, kInputData,
+      input_quantized, input_scale, kFilterShape,
+      kFilterData, filter_quantized,
+      kBiasShape, kBiasData, bias_quantized,
+      scales, zero_points, kOutputShape,
+      kGoldenData, golden_quantized, output_data, output_scale,
+      &conv_params);
 }
 
 TF_LITE_MICRO_TEST(FilterDimsNotMatchingAffineQuantization) {
@@ -394,7 +441,8 @@
   TF_LITE_MICRO_EXPECT_EQ(
       kTfLiteError, tflite::testing::ValidateConvGoldens(
                         tensors, tensors_size, golden_quantized, output_data,
-                        output_dims_count, kTfLiteActNone));
+                        output_dims_count,
+                        &tflite::testing::common_conv_params));
 
   // Set scale back to correct dimension, and make zero point array too short.
   quant->scale->size = tflite::testing::kFilterShape[0];
@@ -402,7 +450,8 @@
   TF_LITE_MICRO_EXPECT_EQ(
       kTfLiteError, tflite::testing::ValidateConvGoldens(
                         tensors, tensors_size, golden_quantized, output_data,
-                        output_dims_count, kTfLiteActNone));
+                        output_dims_count,
+                        &tflite::testing::common_conv_params));
 }
 
 TF_LITE_MICRO_TESTS_END