Implement reference kernel for Softmax using CMSIS-NN
diff --git a/tensorflow/lite/micro/kernels/cmsis-nn/softmax.cc b/tensorflow/lite/micro/kernels/cmsis-nn/softmax.cc
new file mode 100644
index 0000000..beaecd6
--- /dev/null
+++ b/tensorflow/lite/micro/kernels/cmsis-nn/softmax.cc
@@ -0,0 +1,228 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/kernels/internal/reference/softmax.h"
+#include "arm_nnfunctions.h"
+#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+
+namespace tflite {
+namespace ops {
+namespace micro {
+namespace activations {
+namespace {
+
+struct OpData {
+  int32_t input_multiplier = 0;
+  int input_left_shift = 0;
+  int32_t input_range_radius = 0;
+  int diff_min = 0;
+};
+
+TfLiteStatus CalculateSoftmaxOpData(TfLiteContext* context,
+                                    const TfLiteTensor* input,
+                                    TfLiteTensor* output,
+                                    const TfLiteSoftmaxParams* params,
+                                    OpData* data) {
+  if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
+    if (input->type == kTfLiteUInt8) {
+      TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
+    } else {
+      TF_LITE_ENSURE_EQ(context, output->params.zero_point, -128);
+    }
+
+    TF_LITE_ENSURE(context, (output->params.scale == 1.f / 256) ||
+                                (output->params.scale == 1.f / 255));
+
+    static const int kScaledDiffIntegerBits = 5;
+
+    tflite::PreprocessSoftmaxScaling(
+        params->beta, input->params.scale, kScaledDiffIntegerBits,
+        &data->input_multiplier, &data->input_left_shift);
+    data->diff_min = -1.0 * tflite::CalculateInputRadius(
+                                kScaledDiffIntegerBits, data->input_left_shift);
+  }
+  return kTfLiteOk;
+}
+
+}  // namespace
+
+void* Init(TfLiteContext* context, const char* buffer, size_t length) {
+  return nullptr;
+}
+
+void Free(TfLiteContext* context, void* buffer) {}
+
+TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
+  return kTfLiteOk;
+}
+
+// Takes a 1D tensor and performs softmax along it.
+void Softmax1DFloat(const TfLiteTensor* input, TfLiteTensor* output,
+                    TfLiteSoftmaxParams* params) {
+  const int input_size = input->dims->data[0];
+  tflite::reference_ops::Softmax(input->data.f, input_size, 1, params->beta,
+                                 output->data.f);
+}
+
+// Takes a 2D tensor and perform softmax along the last dimension.
+void Softmax2DFloat(const TfLiteTensor* input, TfLiteTensor* output,
+                    TfLiteSoftmaxParams* params) {
+  const int batch_size = input->dims->data[0];
+  const int input_size = input->dims->data[1];
+  tflite::reference_ops::Softmax(input->data.f, input_size, batch_size,
+                                 params->beta, output->data.f);
+}
+
+void Softmax1DQuantized(const TfLiteTensor* input, TfLiteTensor* output,
+                        TfLiteSoftmaxParams* params, OpData* data) {
+  const int input_size = input->dims->data[0];
+  const int32_t shape_data[4] = {1, 1, 1, input_size};
+  RuntimeShape shape(4, shape_data);
+  SoftmaxParams op_params;
+  op_params.input_multiplier = data->input_multiplier;
+  op_params.input_left_shift = data->input_left_shift;
+  op_params.diff_min = data->diff_min;
+  if (input->type == kTfLiteUInt8) {
+    tflite::reference_ops::Softmax(op_params, shape,
+                                   GetTensorData<uint8_t>(input), shape,
+                                   GetTensorData<uint8_t>(output));
+  } else {
+    arm_softmax_s8(GetTensorData<int8_t>(input), shape_data[0], shape_data[3],
+                   op_params.input_multiplier, op_params.input_left_shift,
+                   op_params.diff_min, GetTensorData<int8_t>(output));
+  }
+}
+
+void Softmax2DQuantized(const TfLiteTensor* input, TfLiteTensor* output,
+                        TfLiteSoftmaxParams* params, OpData* data) {
+  const int batch_size = input->dims->data[0];
+  const int input_size = input->dims->data[1];
+  const int32_t shape_data[4] = {batch_size, 1, 1, input_size};
+  RuntimeShape shape(4, shape_data);
+  SoftmaxParams op_params;
+  op_params.input_multiplier = data->input_multiplier;
+  op_params.input_left_shift = data->input_left_shift;
+  op_params.diff_min = data->diff_min;
+  if (input->type == kTfLiteUInt8) {
+    tflite::reference_ops::Softmax(op_params, shape,
+                                   GetTensorData<uint8_t>(input), shape,
+                                   GetTensorData<uint8_t>(output));
+  } else {
+    arm_softmax_s8(GetTensorData<int8_t>(input), shape_data[0], shape_data[3],
+                   op_params.input_multiplier, op_params.input_left_shift,
+                   op_params.diff_min, GetTensorData<int8_t>(output));
+  }
+}
+
+// Takes a 4D tensor and perform softmax along the forth dimension.
+void Softmax4DFloat(const TfLiteTensor* input, TfLiteTensor* output,
+                    TfLiteSoftmaxParams* params) {
+  SoftmaxParams op_params;
+  op_params.beta = params->beta;
+  tflite::reference_ops::Softmax(
+      op_params, GetTensorShape(input), GetTensorData<float>(input),
+      GetTensorShape(output), GetTensorData<float>(output));
+}
+
+void Softmax4DQuantized(const TfLiteTensor* input, TfLiteTensor* output,
+                        TfLiteSoftmaxParams* params, OpData* data) {
+  SoftmaxParams op_params;
+  op_params.input_multiplier = data->input_multiplier;
+  op_params.input_left_shift = data->input_left_shift;
+  op_params.diff_min = data->diff_min;
+  if (input->type == kTfLiteUInt8) {
+    tflite::reference_ops::Softmax(
+        op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
+        GetTensorShape(output), GetTensorData<uint8_t>(output));
+  } else {
+    arm_softmax_s8(
+        GetTensorData<int8_t>(input),
+        input->dims->data[0] * input->dims->data[1] * input->dims->data[2],
+        input->dims->data[3], op_params.input_multiplier,
+        op_params.input_left_shift, op_params.diff_min,
+        GetTensorData<int8_t>(output));
+  }
+}
+
+TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
+  auto* params = reinterpret_cast<TfLiteSoftmaxParams*>(node->builtin_data);
+
+  const TfLiteTensor* input = GetInput(context, node, 0);
+  TfLiteTensor* output = GetOutput(context, node, 0);
+
+  OpData local_data_object;
+  OpData* data = &local_data_object;
+  TF_LITE_ENSURE_STATUS(
+      CalculateSoftmaxOpData(context, input, output, params, data));
+
+  switch (input->type) {
+    case kTfLiteFloat32: {
+      if (NumDimensions(input) == 1) {
+        Softmax1DFloat(input, output, params);
+        return kTfLiteOk;
+      }
+      if (NumDimensions(input) == 2) {
+        Softmax2DFloat(input, output, params);
+        return kTfLiteOk;
+      }
+      if (NumDimensions(input) == 4) {
+        Softmax4DFloat(input, output, params);
+        return kTfLiteOk;
+      }
+      context->ReportError(
+          context, "Only 1D, 2D and 4D tensors supported currently, got %dD.",
+          NumDimensions(input));
+      return kTfLiteError;
+    }
+    case kTfLiteUInt8:
+    case kTfLiteInt8: {
+      if (NumDimensions(input) == 1) {
+        Softmax1DQuantized(input, output, params, data);
+        return kTfLiteOk;
+      }
+      if (NumDimensions(input) == 2) {
+        Softmax2DQuantized(input, output, params, data);
+        return kTfLiteOk;
+      }
+      if (NumDimensions(input) == 4) {
+        Softmax4DQuantized(input, output, params, data);
+        return kTfLiteOk;
+      }
+      context->ReportError(
+          context, "Only 2D and 4D tensors supported currently, got %dD.",
+          NumDimensions(input));
+      return kTfLiteError;
+    }
+    default:
+      context->ReportError(
+          context,
+          "Only float32, uint8_t and int8_t supported currently, got %d.",
+          input->type);
+      return kTfLiteError;
+  }
+}
+}  // namespace activations
+
+TfLiteRegistration* Register_SOFTMAX() {
+  static TfLiteRegistration r = {activations::Init, activations::Free,
+                                 activations::SoftmaxPrepare,
+                                 activations::SoftmaxEval};
+  return &r;
+}
+
+}  // namespace micro
+}  // namespace ops
+}  // namespace tflite
diff --git a/tensorflow/lite/micro/kernels/softmax.cc b/tensorflow/lite/micro/kernels/softmax.cc
index 41b7311..e3b6a96 100644
--- a/tensorflow/lite/micro/kernels/softmax.cc
+++ b/tensorflow/lite/micro/kernels/softmax.cc
@@ -238,7 +238,7 @@
         return kTfLiteOk;
       }
       TF_LITE_KERNEL_LOG(context,
-                         "Only 2D and 4D tensors supported currently, got %dD.",
+                         "Only 1D, 2D and 4D tensors supported currently, got %dD.",
                          NumDimensions(input));
       return kTfLiteError;
     }
diff --git a/tensorflow/lite/micro/kernels/softmax_test.cc b/tensorflow/lite/micro/kernels/softmax_test.cc
index 379c6c9..0e7715c 100644
--- a/tensorflow/lite/micro/kernels/softmax_test.cc
+++ b/tensorflow/lite/micro/kernels/softmax_test.cc
@@ -250,7 +250,7 @@
       output_data);
 }
 
-TF_LITE_MICRO_TEST(SimpleTestQuantized) {
+TF_LITE_MICRO_TEST(SimpleTestQuantizedUnsigned) {
   using tflite::testing::F2Q;
 
   const float input_min = -63.5f;
@@ -322,4 +322,360 @@
       output_data);
 }
 
+TF_LITE_MICRO_TEST(SimpleTestQuantizedSigned1D) {
+  using tflite::testing::F2QS;
+
+  const float input_min = -63.5f;
+  const float input_max = 64.0f;
+  const float output_min = 0.0f;
+  const float output_max = (255.0f / 256.0f);
+  const int output_dims_count = 5;
+  int8_t output_data[output_dims_count];
+  tflite::testing::TestSoftmaxQuantizedSigned(  //
+      {1, 5},                                   // Input shape.
+      {
+          F2QS(1.0, input_min, input_max),
+          F2QS(2.0, input_min, input_max),
+          F2QS(3.0, input_min, input_max),
+          F2QS(4.0, input_min, input_max),
+          F2QS(5.0, input_min, input_max),
+      },
+      input_min, input_max,  // Input quantized range.
+      {
+          // Expected results.
+          F2QS(0.011656231, output_min, output_max),
+          F2QS(0.031684921, output_min, output_max),
+          F2QS(0.086128544, output_min, output_max),
+          F2QS(0.234121657, output_min, output_max),
+          F2QS(0.636408647, output_min, output_max),
+      },
+      {1, 5},                  // Output shape.
+      output_min, output_max,  // Output quantized range.
+      output_data);
+}
+
+TF_LITE_MICRO_TEST(SimpleTestQuantizedSigned2D) {
+  using tflite::testing::F2QS;
+
+  const float input_min = -63.5f;
+  const float input_max = 64.0f;
+  const float output_min = 0.0f;
+  const float output_max = (255.0f / 256.0f);
+  const int output_dims_count = 10;
+  int8_t output_data[output_dims_count];
+  tflite::testing::TestSoftmaxQuantizedSigned(  //
+      {2, 2, 5},                                // Input shape.
+      {                                         // h = 0
+       F2QS(-3.0, input_min, input_max), F2QS(5.0, input_min, input_max),
+       F2QS(-7.0, input_min, input_max), F2QS(9.0, input_min, input_max),
+       F2QS(-11.0, input_min, input_max),
+       // h = 1
+       F2QS(1.0, input_min, input_max), F2QS(2.0, input_min, input_max),
+       F2QS(3.0, input_min, input_max), F2QS(4.0, input_min, input_max),
+       F2QS(5.0, input_min, input_max)},
+      input_min, input_max,  // Input quantized range.
+      {
+          // Expected results.
+          // h = 0
+          F2QS(0.000006034, output_min, output_max),
+          F2QS(0.017986099, output_min, output_max),
+          F2QS(0.000000111, output_min, output_max),
+          F2QS(0.982007754, output_min, output_max),
+          F2QS(0.000000002, output_min, output_max),
+          // h = 1
+          F2QS(0.011656231, output_min, output_max),
+          F2QS(0.031684921, output_min, output_max),
+          F2QS(0.086128544, output_min, output_max),
+          F2QS(0.234121657, output_min, output_max),
+          F2QS(0.636408647, output_min, output_max),
+      },
+      {2, 2, 5},               // Output shape.
+      output_min, output_max,  // Output quantized range.
+      output_data);
+}
+
+TF_LITE_MICRO_TEST(SimpleTestQuantizedSigned4D) {
+  using tflite::testing::F2QS;
+
+  const float input_min = -63.5f;
+  const float input_max = 64.0f;
+  const float output_min = 0.0f;
+  const float output_max = (255.0f / 256.0f);
+  const int output_dims_count = 120;
+  int8_t output_data[output_dims_count];
+  tflite::testing::TestSoftmaxQuantizedSigned(  //
+      {4, 2, 3, 4, 5},                          // Input shape.
+      {                                         // n = 0
+       // c = 0
+       // h = 0
+       F2QS(3.00, input_min, input_max), F2QS(6.00, input_min, input_max),
+       F2QS(-5.00, input_min, input_max), F2QS(4.00, input_min, input_max),
+       F2QS(-9.00, input_min, input_max),
+       // h = 1
+       F2QS(-10.00, input_min, input_max), F2QS(-10.00, input_min, input_max),
+       F2QS(-8.00, input_min, input_max), F2QS(2.00, input_min, input_max),
+       F2QS(2.00, input_min, input_max),
+       // h = 2
+       F2QS(8.00, input_min, input_max), F2QS(-5.00, input_min, input_max),
+       F2QS(-8.00, input_min, input_max), F2QS(5.00, input_min, input_max),
+       F2QS(-6.00, input_min, input_max),
+       // h = 3
+       F2QS(-8.00, input_min, input_max), F2QS(6.00, input_min, input_max),
+       F2QS(1.00, input_min, input_max), F2QS(-10.00, input_min, input_max),
+       F2QS(-8.00, input_min, input_max),
+
+       // c = 1
+       // h = 0
+       F2QS(7.00, input_min, input_max), F2QS(6.00, input_min, input_max),
+       F2QS(-10.00, input_min, input_max), F2QS(-4.00, input_min, input_max),
+       F2QS(-5.00, input_min, input_max),
+       // h = 1
+       F2QS(2.00, input_min, input_max), F2QS(7.00, input_min, input_max),
+       F2QS(9.00, input_min, input_max), F2QS(-9.00, input_min, input_max),
+       F2QS(7.00, input_min, input_max),
+       // h = 2
+       F2QS(-4.00, input_min, input_max), F2QS(-2.00, input_min, input_max),
+       F2QS(8.00, input_min, input_max), F2QS(2.00, input_min, input_max),
+       F2QS(2.00, input_min, input_max),
+       // h = 3
+       F2QS(3.00, input_min, input_max), F2QS(6.00, input_min, input_max),
+       F2QS(6.00, input_min, input_max), F2QS(2.00, input_min, input_max),
+       F2QS(4.00, input_min, input_max),
+
+       // c = 2
+       // h = 0
+       F2QS(9.00, input_min, input_max), F2QS(7.00, input_min, input_max),
+       F2QS(-7.00, input_min, input_max), F2QS(0.00, input_min, input_max),
+       F2QS(4.00, input_min, input_max),
+       // h = 1
+       F2QS(-3.00, input_min, input_max), F2QS(8.00, input_min, input_max),
+       F2QS(8.00, input_min, input_max), F2QS(-3.00, input_min, input_max),
+       F2QS(-4.00, input_min, input_max),
+       // h = 2
+       F2QS(-9.00, input_min, input_max), F2QS(-9.00, input_min, input_max),
+       F2QS(4.00, input_min, input_max), F2QS(-8.00, input_min, input_max),
+       F2QS(-1.00, input_min, input_max),
+       // h = 3
+       F2QS(-10.00, input_min, input_max), F2QS(-2.00, input_min, input_max),
+       F2QS(6.00, input_min, input_max), F2QS(-7.00, input_min, input_max),
+       F2QS(0.00, input_min, input_max),
+
+       // n = 1
+       // c = 0
+       // h = 0
+       F2QS(-9.00, input_min, input_max), F2QS(-8.00, input_min, input_max),
+       F2QS(6.00, input_min, input_max), F2QS(-1.00, input_min, input_max),
+       F2QS(-5.00, input_min, input_max),
+       // h = 1
+       F2QS(-10.00, input_min, input_max), F2QS(-5.00, input_min, input_max),
+       F2QS(-10.00, input_min, input_max), F2QS(7.00, input_min, input_max),
+       F2QS(-2.00, input_min, input_max),
+       // h = 2
+       F2QS(-5.00, input_min, input_max), F2QS(-4.00, input_min, input_max),
+       F2QS(1.00, input_min, input_max), F2QS(2.00, input_min, input_max),
+       F2QS(2.00, input_min, input_max),
+       // h = 3
+       F2QS(-2.00, input_min, input_max), F2QS(-2.00, input_min, input_max),
+       F2QS(1.00, input_min, input_max), F2QS(1.00, input_min, input_max),
+       F2QS(-4.00, input_min, input_max),
+
+       // c = 1
+       // h = 0
+       F2QS(-8.00, input_min, input_max), F2QS(-3.00, input_min, input_max),
+       F2QS(1.00, input_min, input_max), F2QS(1.00, input_min, input_max),
+       F2QS(-1.00, input_min, input_max),
+       // h = 1
+       F2QS(-2.00, input_min, input_max), F2QS(6.00, input_min, input_max),
+       F2QS(-1.00, input_min, input_max), F2QS(-5.00, input_min, input_max),
+       F2QS(6.00, input_min, input_max),
+       // h = 2
+       F2QS(-7.00, input_min, input_max), F2QS(8.00, input_min, input_max),
+       F2QS(9.00, input_min, input_max), F2QS(0.00, input_min, input_max),
+       F2QS(9.00, input_min, input_max),
+       // h = 3
+       F2QS(-9.00, input_min, input_max), F2QS(-5.00, input_min, input_max),
+       F2QS(-2.00, input_min, input_max), F2QS(0.00, input_min, input_max),
+       F2QS(8.00, input_min, input_max),
+
+       // c = 2
+       // h = 0
+       F2QS(4.00, input_min, input_max), F2QS(2.00, input_min, input_max),
+       F2QS(-3.00, input_min, input_max), F2QS(5.00, input_min, input_max),
+       F2QS(8.00, input_min, input_max),
+       // h = 1
+       F2QS(-1.00, input_min, input_max), F2QS(1.00, input_min, input_max),
+       F2QS(-4.00, input_min, input_max), F2QS(-9.00, input_min, input_max),
+       F2QS(7.00, input_min, input_max),
+       // h = 2
+       F2QS(3.00, input_min, input_max), F2QS(-8.00, input_min, input_max),
+       F2QS(0.00, input_min, input_max), F2QS(9.00, input_min, input_max),
+       F2QS(-4.00, input_min, input_max),
+       // h = 3
+       F2QS(8.00, input_min, input_max), F2QS(-1.00, input_min, input_max),
+       F2QS(9.00, input_min, input_max), F2QS(-9.00, input_min, input_max),
+       F2QS(1.00, input_min, input_max)},
+      input_min, input_max,  // Input quantized range.
+      {                      // Expected results.
+       // n = 0
+       // c = 0
+       // h = 0
+       F2QS(0.042009463, output_min, output_max),
+       F2QS(0.843782625, output_min, output_max),
+       F2QS(0.000014093, output_min, output_max),
+       F2QS(0.114193561, output_min, output_max),
+       F2QS(0.000000258, output_min, output_max),
+       // h = 1
+       F2QS(0.000003072, output_min, output_max),
+       F2QS(0.000003072, output_min, output_max),
+       F2QS(0.000022699, output_min, output_max),
+       F2QS(0.499985578, output_min, output_max),
+       F2QS(0.499985578, output_min, output_max),
+       // h = 2
+       F2QS(0.952571219, output_min, output_max),
+       F2QS(0.000002153, output_min, output_max),
+       F2QS(0.000000107, output_min, output_max),
+       F2QS(0.047425728, output_min, output_max),
+       F2QS(0.000000792, output_min, output_max),
+       // h = 3
+       F2QS(0.000000826, output_min, output_max),
+       F2QS(0.993305397, output_min, output_max),
+       F2QS(0.006692839, output_min, output_max),
+       F2QS(0.000000112, output_min, output_max),
+       F2QS(0.000000826, output_min, output_max),
+
+       // c = 1
+       // h = 0
+       F2QS(0.731046347, output_min, output_max),
+       F2QS(0.268936922, output_min, output_max),
+       F2QS(0.000000030, output_min, output_max),
+       F2QS(0.000012210, output_min, output_max),
+       F2QS(0.000004492, output_min, output_max),
+       // h = 1
+       F2QS(0.000717124, output_min, output_max),
+       F2QS(0.106430599, output_min, output_max),
+       F2QS(0.786421666, output_min, output_max),
+       F2QS(0.000000012, output_min, output_max),
+       F2QS(0.106430599, output_min, output_max),
+       // h = 2
+       F2QS(0.000006114, output_min, output_max),
+       F2QS(0.000045174, output_min, output_max),
+       F2QS(0.995015917, output_min, output_max),
+       F2QS(0.002466398, output_min, output_max),
+       F2QS(0.002466398, output_min, output_max),
+       // h = 3
+       F2QS(0.022595176, output_min, output_max),
+       F2QS(0.453836234, output_min, output_max),
+       F2QS(0.453836234, output_min, output_max),
+       F2QS(0.008312301, output_min, output_max),
+       F2QS(0.061420055, output_min, output_max),
+
+       // c = 2
+       // h = 0
+       F2QS(0.875505904, output_min, output_max),
+       F2QS(0.118486839, output_min, output_max),
+       F2QS(0.000000099, output_min, output_max),
+       F2QS(0.000108046, output_min, output_max),
+       F2QS(0.005899112, output_min, output_max),
+       // h = 1
+       F2QS(0.000008351, output_min, output_max),
+       F2QS(0.499990113, output_min, output_max),
+       F2QS(0.499990113, output_min, output_max),
+       F2QS(0.000008351, output_min, output_max),
+       F2QS(0.000003072, output_min, output_max),
+       // h = 2
+       F2QS(0.000002245, output_min, output_max),
+       F2QS(0.000002245, output_min, output_max),
+       F2QS(0.993296627, output_min, output_max),
+       F2QS(0.000006103, output_min, output_max),
+       F2QS(0.006692780, output_min, output_max),
+       // h = 3
+       F2QS(0.000000112, output_min, output_max),
+       F2QS(0.000334520, output_min, output_max),
+       F2QS(0.997191323, output_min, output_max),
+       F2QS(0.000002254, output_min, output_max),
+       F2QS(0.002471790, output_min, output_max),
+
+       // n = 1
+       // c = 0
+       // h = 0
+       F2QS(0.000000306, output_min, output_max),
+       F2QS(0.000000831, output_min, output_max),
+       F2QS(0.999071142, output_min, output_max),
+       F2QS(0.000911035, output_min, output_max),
+       F2QS(0.000016686, output_min, output_max),
+       // h = 1
+       F2QS(0.000000041, output_min, output_max),
+       F2QS(0.000006143, output_min, output_max),
+       F2QS(0.000000041, output_min, output_max),
+       F2QS(0.999870380, output_min, output_max),
+       F2QS(0.000123394, output_min, output_max),
+       // h = 2
+       F2QS(0.000384554, output_min, output_max),
+       F2QS(0.001045327, output_min, output_max),
+       F2QS(0.155140254, output_min, output_max),
+       F2QS(0.421714933, output_min, output_max),
+       F2QS(0.421714933, output_min, output_max),
+       // h = 3
+       F2QS(0.023637081, output_min, output_max),
+       F2QS(0.023637081, output_min, output_max),
+       F2QS(0.474763454, output_min, output_max),
+       F2QS(0.474763454, output_min, output_max),
+       F2QS(0.003198931, output_min, output_max),
+
+       // c = 1
+       // h = 0
+       F2QS(0.000057299, output_min, output_max),
+       F2QS(0.008503973, output_min, output_max),
+       F2QS(0.464301197, output_min, output_max),
+       F2QS(0.464301197, output_min, output_max),
+       F2QS(0.062836334, output_min, output_max),
+       // h = 1
+       F2QS(0.000167625, output_min, output_max),
+       F2QS(0.499684188, output_min, output_max),
+       F2QS(0.000455653, output_min, output_max),
+       F2QS(0.000008346, output_min, output_max),
+       F2QS(0.499684188, output_min, output_max),
+       // h = 2
+       F2QS(0.000000048, output_min, output_max),
+       F2QS(0.155354299, output_min, output_max),
+       F2QS(0.422296769, output_min, output_max),
+       F2QS(0.000052116, output_min, output_max),
+       F2QS(0.422296769, output_min, output_max),
+       // h = 3
+       F2QS(0.000000041, output_min, output_max),
+       F2QS(0.000002259, output_min, output_max),
+       F2QS(0.000045383, output_min, output_max),
+       F2QS(0.000335334, output_min, output_max),
+       F2QS(0.999616982, output_min, output_max),
+
+       // c = 2
+       // h = 0
+       F2QS(0.017107856, output_min, output_max),
+       F2QS(0.002315297, output_min, output_max),
+       F2QS(0.000015600, output_min, output_max),
+       F2QS(0.046503973, output_min, output_max),
+       F2QS(0.934057274, output_min, output_max),
+       // h = 1
+       F2QS(0.000334516, output_min, output_max),
+       F2QS(0.002471755, output_min, output_max),
+       F2QS(0.000016655, output_min, output_max),
+       F2QS(0.000000112, output_min, output_max),
+       F2QS(0.997176963, output_min, output_max),
+       // h = 2
+       F2QS(0.002472313, output_min, output_max),
+       F2QS(0.000000041, output_min, output_max),
+       F2QS(0.000123089, output_min, output_max),
+       F2QS(0.997402302, output_min, output_max),
+       F2QS(0.000002254, output_min, output_max),
+       // h = 3
+       F2QS(0.268866557, output_min, output_max),
+       F2QS(0.000033181, output_min, output_max),
+       F2QS(0.730855076, output_min, output_max),
+       F2QS(0.000000011, output_min, output_max),
+       F2QS(0.000245175, output_min, output_max)},
+      {4, 2, 3, 4, 5},         // Output shape.
+      output_min, output_max,  // Output quantized range.
+      output_data);
+}
+
 TF_LITE_MICRO_TESTS_END