Support quantized FULLY_CONNECTED op in XNNPACK delegate

PiperOrigin-RevId: 369341848
Change-Id: I121c317ba6ef7975a1af06940d55093872c1ab7f
diff --git a/tensorflow/lite/delegates/xnnpack/BUILD b/tensorflow/lite/delegates/xnnpack/BUILD
index e793043..5d99239 100644
--- a/tensorflow/lite/delegates/xnnpack/BUILD
+++ b/tensorflow/lite/delegates/xnnpack/BUILD
@@ -276,6 +276,23 @@
 )
 
 cc_library(
+    name = "quantized_fully_connected_tester",
+    testonly = 1,
+    srcs = ["quantized_fully_connected_tester.cc"],
+    hdrs = ["quantized_fully_connected_tester.h"],
+    deps = [
+        "//tensorflow/lite:framework",
+        "//tensorflow/lite:schema_fbs_version",
+        "//tensorflow/lite/c:common",
+        "//tensorflow/lite/kernels:builtin_ops",
+        "//tensorflow/lite/schema:schema_conversion_utils",
+        "//tensorflow/lite/schema:schema_fbs",
+        "@com_google_googletest//:gtest",
+        "@flatbuffers",
+    ],
+)
+
+cc_library(
     name = "reduce_tester",
     testonly = 1,
     srcs = ["reduce_tester.cc"],
@@ -765,6 +782,21 @@
 )
 
 cc_test(
+    name = "quantized_fully_connected_test",
+    srcs = ["quantized_fully_connected_test.cc"],
+    linkopts = select({
+        "//tensorflow:emscripten": EMSCRIPTEN_LINKOPTS,
+        "//conditions:default": [],
+    }),
+    deps = [
+        ":quantized_fully_connected_tester",
+        ":test_main",
+        ":xnnpack_delegate_test_mode",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
+cc_test(
     name = "relu_test",
     srcs = ["relu_test.cc"],
     linkopts = select({
diff --git a/tensorflow/lite/delegates/xnnpack/quantized_fully_connected_test.cc b/tensorflow/lite/delegates/xnnpack/quantized_fully_connected_test.cc
new file mode 100644
index 0000000..0b7927b
--- /dev/null
+++ b/tensorflow/lite/delegates/xnnpack/quantized_fully_connected_test.cc
@@ -0,0 +1,326 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <random>
+
+#include <gtest/gtest.h>
+#include "tensorflow/lite/delegates/xnnpack/quantized_fully_connected_tester.h"
+#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"
+
+namespace tflite {
+namespace xnnpack {
+
+TEST(QuantizedFullyConnected, 1D) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto channels_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 9), std::ref(rng));
+  const auto input_channels = channels_rng();
+  const auto output_channels = channels_rng();
+
+  QuantizedFullyConnectedTester()
+      .InputShape({input_channels})
+      .InputChannels(input_channels)
+      .OutputChannels(output_channels)
+      .Test(xnnpack_delegate.get());
+}
+
+TEST(QuantizedFullyConnected, 1DKeepDims) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto channels_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 9), std::ref(rng));
+  const auto input_channels = channels_rng();
+  const auto output_channels = channels_rng();
+
+  QuantizedFullyConnectedTester()
+      .InputShape({input_channels})
+      .InputChannels(input_channels)
+      .OutputChannels(output_channels)
+      .KeepDims(true)
+      .Test(xnnpack_delegate.get());
+}
+
+TEST(QuantizedFullyConnected, 2D) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto batch_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  auto channels_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 9), std::ref(rng));
+  const auto batch = batch_rng();
+  const auto input_channels = channels_rng();
+  const auto output_channels = channels_rng();
+
+  QuantizedFullyConnectedTester()
+      .InputShape({batch, input_channels})
+      .InputChannels(input_channels)
+      .OutputChannels(output_channels)
+      .Test(xnnpack_delegate.get());
+}
+
+TEST(QuantizedFullyConnected, 2DKeepDims) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto batch_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  auto channels_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 9), std::ref(rng));
+  const auto batch = batch_rng();
+  const auto input_channels = channels_rng();
+  const auto output_channels = channels_rng();
+
+  QuantizedFullyConnectedTester()
+      .InputShape({batch, input_channels})
+      .InputChannels(input_channels)
+      .OutputChannels(output_channels)
+      .KeepDims(true)
+      .Test(xnnpack_delegate.get());
+}
+
+TEST(QuantizedFullyConnected, 3D) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  auto channels_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 9), std::ref(rng));
+  const auto batch = shape_rng();
+  const auto width = shape_rng();
+  const auto input_channels = channels_rng();
+  const auto output_channels = channels_rng();
+
+  QuantizedFullyConnectedTester()
+      .InputShape({batch, width, input_channels})
+      .InputChannels(input_channels)
+      .OutputChannels(output_channels)
+      .Test(xnnpack_delegate.get());
+}
+
+TEST(QuantizedFullyConnected, 3DReshape) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  auto channels_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 9), std::ref(rng));
+  const auto batch = shape_rng();
+  const auto width = shape_rng();
+  const auto input_channels = channels_rng();
+  const auto output_channels = channels_rng();
+
+  QuantizedFullyConnectedTester()
+      .InputShape({batch, width, input_channels})
+      .InputChannels(width * input_channels)
+      .OutputChannels(output_channels)
+      .Test(xnnpack_delegate.get());
+}
+
+TEST(QuantizedFullyConnected, 3DKeepDims) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  auto channels_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 9), std::ref(rng));
+  const auto batch = shape_rng();
+  const auto width = shape_rng();
+  const auto input_channels = channels_rng();
+  const auto output_channels = channels_rng();
+
+  QuantizedFullyConnectedTester()
+      .InputShape({batch, width, input_channels})
+      .InputChannels(input_channels)
+      .OutputChannels(output_channels)
+      .KeepDims(true)
+      .Test(xnnpack_delegate.get());
+}
+
+TEST(QuantizedFullyConnected, 4D) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  auto channels_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 9), std::ref(rng));
+  const auto batch = shape_rng();
+  const auto height = shape_rng();
+  const auto width = shape_rng();
+  const auto input_channels = channels_rng();
+  const auto output_channels = channels_rng();
+
+  QuantizedFullyConnectedTester()
+      .InputShape({batch, height, width, input_channels})
+      .InputChannels(input_channels)
+      .OutputChannels(output_channels)
+      .Test(xnnpack_delegate.get());
+}
+
+TEST(QuantizedFullyConnected, 4DKeepDims) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto shape_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  auto channels_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 9), std::ref(rng));
+  const auto batch = shape_rng();
+  const auto height = shape_rng();
+  const auto width = shape_rng();
+  const auto input_channels = channels_rng();
+  const auto output_channels = channels_rng();
+
+  QuantizedFullyConnectedTester()
+      .InputShape({batch, height, width, input_channels})
+      .InputChannels(input_channels)
+      .OutputChannels(output_channels)
+      .KeepDims(true)
+      .Test(xnnpack_delegate.get());
+}
+
+TEST(QuantizedFullyConnected, ReluActivation) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto batch_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  auto channels_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 9), std::ref(rng));
+  const auto batch = batch_rng();
+  const auto input_channels = channels_rng();
+  const auto output_channels = channels_rng();
+
+  QuantizedFullyConnectedTester()
+      .InputShape({batch, input_channels})
+      .InputChannels(input_channels)
+      .OutputChannels(output_channels)
+      .ReluActivation()
+      .Test(xnnpack_delegate.get());
+}
+
+TEST(QuantizedFullyConnected, Relu6Activation) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto batch_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  auto channels_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 9), std::ref(rng));
+  const auto batch = batch_rng();
+  const auto input_channels = channels_rng();
+  const auto output_channels = channels_rng();
+
+  QuantizedFullyConnectedTester()
+      .InputShape({batch, input_channels})
+      .InputChannels(input_channels)
+      .OutputChannels(output_channels)
+      .Relu6Activation()
+      .Test(xnnpack_delegate.get());
+}
+
+TEST(QuantizedFullyConnected, ReluMinus1To1Activation) {
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto batch_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  auto channels_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 9), std::ref(rng));
+  const auto batch = batch_rng();
+  const auto input_channels = channels_rng();
+  const auto output_channels = channels_rng();
+
+  QuantizedFullyConnectedTester()
+      .InputShape({batch, input_channels})
+      .InputChannels(input_channels)
+      .OutputChannels(output_channels)
+      .ReluMinus1To1Activation()
+      .Test(xnnpack_delegate.get());
+}
+
+TEST(QuantizedFullyConnected, MultiThreading) {
+  TfLiteXNNPackDelegateOptions delegate_options =
+      TfLiteXNNPackDelegateOptionsDefault();
+  delegate_options.num_threads = 2;
+  std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+      xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options),
+                       TfLiteXNNPackDelegateDelete);
+
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto batch_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+  auto channels_rng =
+      std::bind(std::uniform_int_distribution<int32_t>(2, 9), std::ref(rng));
+  const auto batch = batch_rng();
+  const auto input_channels = channels_rng();
+  const auto output_channels = channels_rng();
+
+  QuantizedFullyConnectedTester()
+      .InputShape({batch, input_channels})
+      .InputChannels(input_channels)
+      .OutputChannels(output_channels)
+      .Test(xnnpack_delegate.get());
+}
+
+}  // namespace xnnpack
+}  // namespace tflite
diff --git a/tensorflow/lite/delegates/xnnpack/quantized_fully_connected_tester.cc b/tensorflow/lite/delegates/xnnpack/quantized_fully_connected_tester.cc
new file mode 100644
index 0000000..1b967b1
--- /dev/null
+++ b/tensorflow/lite/delegates/xnnpack/quantized_fully_connected_tester.cc
@@ -0,0 +1,240 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/delegates/xnnpack/quantized_fully_connected_tester.h"
+
+#include <array>
+#include <cstdint>
+#include <functional>
+#include <limits>
+#include <numeric>
+#include <random>
+#include <vector>
+
+#include <gtest/gtest.h>
+#include "flatbuffers/flatbuffers.h"  // from @flatbuffers
+#include "tensorflow/lite/interpreter.h"
+#include "tensorflow/lite/kernels/register.h"
+#include "tensorflow/lite/model.h"
+#include "tensorflow/lite/schema/schema_conversion_utils.h"
+#include "tensorflow/lite/schema/schema_generated.h"
+#include "tensorflow/lite/version.h"
+
+namespace tflite {
+namespace xnnpack {
+
+std::vector<int32_t> QuantizedFullyConnectedTester::OutputShape() const {
+  EXPECT_NE(input_shape_.size(), 0);
+  if (KeepDims()) {
+    std::vector<int32_t> output_shape(input_shape_.cbegin(),
+                                      input_shape_.cend() - 1);
+    output_shape.push_back(OutputChannels());
+    return output_shape;
+  } else {
+    EXPECT_EQ(InputSize() % InputChannels(), 0);
+    return std::vector<int32_t>(
+        {InputSize() / InputChannels(), OutputChannels()});
+  }
+}
+
+void QuantizedFullyConnectedTester::Test(TfLiteDelegate* delegate) const {
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto input_rng = std::bind(std::uniform_int_distribution<int32_t>(
+                                 std::numeric_limits<int8_t>::min(),
+                                 std::numeric_limits<int8_t>::max()),
+                             std::ref(rng));
+
+  std::vector<char> buffer = CreateTfLiteModel();
+  const Model* model = GetModel(buffer.data());
+
+  std::unique_ptr<Interpreter> delegate_interpreter;
+  ASSERT_EQ(
+      InterpreterBuilder(
+          model,
+          ::tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates())(
+          &delegate_interpreter),
+      kTfLiteOk);
+  std::unique_ptr<Interpreter> default_interpreter;
+  ASSERT_EQ(
+      InterpreterBuilder(
+          model,
+          ::tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates())(
+          &default_interpreter),
+      kTfLiteOk);
+
+  ASSERT_TRUE(delegate_interpreter);
+  ASSERT_TRUE(default_interpreter);
+
+  ASSERT_EQ(delegate_interpreter->inputs().size(), 1);
+  ASSERT_EQ(default_interpreter->inputs().size(), 1);
+
+  ASSERT_EQ(delegate_interpreter->outputs().size(), 1);
+  ASSERT_EQ(default_interpreter->outputs().size(), 1);
+
+  ASSERT_EQ(delegate_interpreter->AllocateTensors(), kTfLiteOk);
+  ASSERT_EQ(default_interpreter->AllocateTensors(), kTfLiteOk);
+
+  ASSERT_EQ(delegate_interpreter->ModifyGraphWithDelegate(delegate), kTfLiteOk);
+
+  int8_t* default_input_data = default_interpreter->typed_tensor<int8_t>(
+      default_interpreter->inputs()[0]);
+  std::generate(default_input_data, default_input_data + InputSize(),
+                std::ref(input_rng));
+
+  int8_t* delegate_input_data = delegate_interpreter->typed_tensor<int8_t>(
+      delegate_interpreter->inputs()[0]);
+  std::copy(default_input_data, default_input_data + InputSize(),
+            delegate_input_data);
+
+  ASSERT_EQ(default_interpreter->Invoke(), kTfLiteOk);
+  ASSERT_EQ(delegate_interpreter->Invoke(), kTfLiteOk);
+
+  int8_t* default_output_data = default_interpreter->typed_tensor<int8_t>(
+      default_interpreter->outputs()[0]);
+  int8_t* delegate_output_data = delegate_interpreter->typed_tensor<int8_t>(
+      delegate_interpreter->outputs()[0]);
+
+  for (size_t i = 0; i < ComputeSize(OutputShape()); i++) {
+    ASSERT_LE(std::abs(static_cast<int32_t>(default_output_data[i]) -
+                       static_cast<int32_t>(delegate_output_data[i])),
+              1);
+  }
+}
+
+std::vector<char> QuantizedFullyConnectedTester::CreateTfLiteModel() const {
+  std::random_device random_device;
+  auto rng = std::mt19937(random_device());
+  auto filter_rng = std::bind(std::uniform_int_distribution<int32_t>(
+                                  -std::numeric_limits<int8_t>::max(),
+                                  std::numeric_limits<int8_t>::max()),
+                              std::ref(rng));
+  auto bias_rng = std::bind(
+      std::uniform_int_distribution<int32_t>(-10000, 10000), std::ref(rng));
+
+  flatbuffers::FlatBufferBuilder builder;
+  const std::array<flatbuffers::Offset<OperatorCode>, 1> operator_codes{
+      {CreateOperatorCode(builder, BuiltinOperator_FULLY_CONNECTED)}};
+  std::vector<flatbuffers::Offset<Operator>> operators;
+
+  std::vector<float> filter_data(InputChannels() * OutputChannels());
+  std::generate(filter_data.begin(), filter_data.end(), std::ref(filter_rng));
+  std::vector<float> bias_data(OutputChannels());
+  std::generate(bias_data.begin(), bias_data.end(), std::ref(bias_rng));
+
+  const std::array<flatbuffers::Offset<Buffer>, 3> buffers{{
+      CreateBuffer(builder, builder.CreateVector({})),
+      CreateBuffer(builder,
+                   builder.CreateVector(
+                       reinterpret_cast<const uint8_t*>(filter_data.data()),
+                       sizeof(int8_t) * filter_data.size())),
+      CreateBuffer(builder,
+                   builder.CreateVector(
+                       reinterpret_cast<const uint8_t*>(bias_data.data()),
+                       sizeof(int32_t) * bias_data.size())),
+  }};
+
+  const std::array<int32_t, 2> filter_shape{
+      {OutputChannels(), InputChannels()}};
+  const std::array<int32_t, 1> bias_shape{{OutputChannels()}};
+
+  const std::vector<int32_t> output_shape = OutputShape();
+  const std::array<flatbuffers::Offset<Tensor>, 4> tensors{{
+      CreateTensor(builder,
+                   builder.CreateVector<int32_t>(InputShape().data(),
+                                                 InputShape().size()),
+                   TensorType_INT8, /*buffer=*/0, /*name=*/0,
+                   CreateQuantizationParameters(
+                       builder, /*min=*/0, /*max=*/0,
+                       builder.CreateVector<float>({InputScale()}),
+                       builder.CreateVector<int64_t>({InputZeroPoint()}))),
+      CreateTensor(builder,
+                   builder.CreateVector<int32_t>(filter_shape.data(),
+                                                 filter_shape.size()),
+                   TensorType_INT8, /*buffer=*/1, /*name=*/0,
+                   CreateQuantizationParameters(
+                       builder, /*min=*/0, /*max=*/0,
+                       builder.CreateVector<float>({FilterScale()}),
+                       builder.CreateVector<int64_t>({0}))),
+      CreateTensor(
+          builder,
+          builder.CreateVector<int32_t>(bias_shape.data(), bias_shape.size()),
+          TensorType_INT32, /*buffer=*/2, /*name=*/0,
+          CreateQuantizationParameters(
+              builder, /*min=*/0, /*max=*/0,
+              builder.CreateVector<float>({InputScale() * FilterScale()}),
+              builder.CreateVector<int64_t>({0}))),
+      CreateTensor(builder,
+                   builder.CreateVector<int32_t>(output_shape.data(),
+                                                 output_shape.size()),
+                   TensorType_INT8, /*buffer=*/0, /*name=*/0,
+                   CreateQuantizationParameters(
+                       builder, /*min=*/0, /*max=*/0,
+                       builder.CreateVector<float>({OutputScale()}),
+                       builder.CreateVector<int64_t>({OutputZeroPoint()}))),
+  }};
+
+  flatbuffers::Offset<FullyConnectedOptions> fully_connected_options =
+      CreateFullyConnectedOptions(builder, Activation(),
+                                  FullyConnectedOptionsWeightsFormat_DEFAULT,
+                                  KeepDims());
+
+  const std::array<int32_t, 3> op_inputs{
+      {static_cast<int>(tensors.size()) - 4,
+       static_cast<int>(tensors.size()) - 3,
+       static_cast<int>(tensors.size()) - 2}};
+  const std::array<int32_t, 1> op_outputs{
+      {static_cast<int>(tensors.size()) - 1}};
+  operators.emplace_back(CreateOperator(
+      builder, /*opcode_index=*/0,
+      builder.CreateVector<int32_t>(op_inputs.data(), op_inputs.size()),
+      builder.CreateVector<int32_t>(op_outputs.data(), op_outputs.size()),
+      BuiltinOptions_FullyConnectedOptions, fully_connected_options.Union()));
+
+  const std::array<int32_t, 1> subgraph_inputs{
+      {static_cast<int>(tensors.size()) - 4}};
+  const std::array<int32_t, 1> subgraph_outputs{
+      {static_cast<int>(tensors.size()) - 1}};
+  flatbuffers::Offset<SubGraph> subgraph = CreateSubGraph(
+      builder, builder.CreateVector(tensors.data(), tensors.size()),
+      builder.CreateVector<int32_t>(subgraph_inputs.data(),
+                                    subgraph_inputs.size()),
+      builder.CreateVector<int32_t>(subgraph_outputs.data(),
+                                    subgraph_outputs.size()),
+      builder.CreateVector(operators.data(), operators.size()));
+
+  flatbuffers::Offset<flatbuffers::String> description =
+      builder.CreateString("Fully Connected model");
+
+  flatbuffers::Offset<Model> model_buffer = CreateModel(
+      builder, TFLITE_SCHEMA_VERSION,
+      builder.CreateVector(operator_codes.data(), operator_codes.size()),
+      builder.CreateVector(&subgraph, 1), description,
+      builder.CreateVector(buffers.data(), buffers.size()));
+
+  builder.Finish(model_buffer);
+
+  return std::vector<char>(builder.GetBufferPointer(),
+                           builder.GetBufferPointer() + builder.GetSize());
+}
+
+int32_t QuantizedFullyConnectedTester::ComputeSize(
+    const std::vector<int32_t>& shape) {
+  return std::accumulate(shape.cbegin(), shape.cend(), 1,
+                         std::multiplies<int32_t>());
+}
+
+}  // namespace xnnpack
+}  // namespace tflite
diff --git a/tensorflow/lite/delegates/xnnpack/quantized_fully_connected_tester.h b/tensorflow/lite/delegates/xnnpack/quantized_fully_connected_tester.h
new file mode 100644
index 0000000..0e708ef
--- /dev/null
+++ b/tensorflow/lite/delegates/xnnpack/quantized_fully_connected_tester.h
@@ -0,0 +1,156 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_DELEGATES_XNNPACK_QUANTIZED_FULLY_CONNECTED_TESTER_H_
+#define TENSORFLOW_LITE_DELEGATES_XNNPACK_QUANTIZED_FULLY_CONNECTED_TESTER_H_
+
+#include <cstdint>
+#include <vector>
+
+#include <gtest/gtest.h>
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/schema/schema_generated.h"
+
+namespace tflite {
+namespace xnnpack {
+
+class QuantizedFullyConnectedTester {
+ public:
+  QuantizedFullyConnectedTester() = default;
+  QuantizedFullyConnectedTester(const QuantizedFullyConnectedTester&) = delete;
+  QuantizedFullyConnectedTester& operator=(
+      const QuantizedFullyConnectedTester&) = delete;
+
+  inline QuantizedFullyConnectedTester& InputShape(
+      std::initializer_list<int32_t> shape) {
+    for (auto it = shape.begin(); it != shape.end(); ++it) {
+      EXPECT_GT(*it, 0);
+    }
+    input_shape_ = std::vector<int32_t>(shape.begin(), shape.end());
+    input_size_ = ComputeSize(input_shape_);
+    return *this;
+  }
+
+  inline const std::vector<int32_t>& InputShape() const { return input_shape_; }
+
+  inline int32_t InputSize() const { return input_size_; }
+
+  inline QuantizedFullyConnectedTester& InputChannels(int32_t input_channels) {
+    EXPECT_GT(input_channels, 0);
+    input_channels_ = input_channels;
+    return *this;
+  }
+
+  inline int32_t InputChannels() const { return input_channels_; }
+
+  inline QuantizedFullyConnectedTester& OutputChannels(
+      int32_t output_channels) {
+    EXPECT_GT(output_channels, 0);
+    output_channels_ = output_channels;
+    return *this;
+  }
+
+  inline int32_t OutputChannels() const { return output_channels_; }
+
+  std::vector<int32_t> OutputShape() const;
+
+  inline QuantizedFullyConnectedTester& InputZeroPoint(
+      int8_t input_zero_point) {
+    input_zero_point_ = input_zero_point;
+    return *this;
+  }
+
+  inline int8_t InputZeroPoint() const { return input_zero_point_; }
+
+  inline QuantizedFullyConnectedTester& OutputZeroPoint(
+      int8_t output_zero_point) {
+    output_zero_point_ = output_zero_point;
+    return *this;
+  }
+
+  inline int8_t OutputZeroPoint() const { return output_zero_point_; }
+
+  inline QuantizedFullyConnectedTester& InputScale(float input_scale) {
+    input_scale_ = input_scale;
+    return *this;
+  }
+
+  inline float InputScale() const { return input_scale_; }
+
+  inline QuantizedFullyConnectedTester& FilterScale(float filter_scale) {
+    filter_scale_ = filter_scale;
+    return *this;
+  }
+
+  inline float FilterScale() const { return filter_scale_; }
+
+  inline QuantizedFullyConnectedTester& OutputScale(float output_scale) {
+    output_scale_ = output_scale;
+    return *this;
+  }
+
+  inline float OutputScale() const { return output_scale_; }
+
+  inline QuantizedFullyConnectedTester& KeepDims(bool keep_dims) {
+    keep_dims_ = keep_dims;
+    return *this;
+  }
+
+  inline bool KeepDims() const { return keep_dims_; }
+
+  inline QuantizedFullyConnectedTester& ReluActivation() {
+    activation_ = ::tflite::ActivationFunctionType_RELU;
+    return *this;
+  }
+
+  inline QuantizedFullyConnectedTester& Relu6Activation() {
+    activation_ = ::tflite::ActivationFunctionType_RELU6;
+    return *this;
+  }
+
+  inline QuantizedFullyConnectedTester& ReluMinus1To1Activation() {
+    activation_ = ::tflite::ActivationFunctionType_RELU_N1_TO_1;
+    return *this;
+  }
+
+  void Test(TfLiteDelegate* delegate) const;
+
+ private:
+  std::vector<char> CreateTfLiteModel() const;
+
+  inline ::tflite::ActivationFunctionType Activation() const {
+    return activation_;
+  }
+
+  static int32_t ComputeSize(const std::vector<int32_t>& shape);
+
+  std::vector<int32_t> input_shape_;
+  int32_t input_size_ = 1;
+  int32_t input_channels_ = 1;
+  int32_t output_channels_ = 1;
+  int8_t input_zero_point_ = 0;
+  int8_t output_zero_point_ = 0;
+  float input_scale_ = 0.8f;
+  float filter_scale_ = 0.75f;
+  float output_scale_ = 1.5f;
+  bool keep_dims_ = false;
+  ::tflite::ActivationFunctionType activation_ =
+      ::tflite::ActivationFunctionType_NONE;
+};
+
+}  // namespace xnnpack
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_DELEGATES_XNNPACK_QUANTIZED_FULLY_CONNECTED_TESTER_H_
diff --git a/tensorflow/lite/tools/cmake/modules/xnnpack.cmake b/tensorflow/lite/tools/cmake/modules/xnnpack.cmake
index bbeca1a..3405b64 100644
--- a/tensorflow/lite/tools/cmake/modules/xnnpack.cmake
+++ b/tensorflow/lite/tools/cmake/modules/xnnpack.cmake
@@ -22,7 +22,7 @@
 OverridableFetchContent_Declare(
   xnnpack
   GIT_REPOSITORY https://github.com/google/XNNPACK
-  GIT_TAG ec56b7ee14e4b186bfb95f96a69784c14fdea016
+  GIT_TAG 8f15372eb67ffab0d54cfe3752acaf8f8415af17
   GIT_PROGRESS TRUE
   PREFIX "${CMAKE_BINARY_DIR}"
   SOURCE_DIR "${CMAKE_BINARY_DIR}/xnnpack"
diff --git a/tensorflow/workspace2.bzl b/tensorflow/workspace2.bzl
index 8c00240..4a2764e 100644
--- a/tensorflow/workspace2.bzl
+++ b/tensorflow/workspace2.bzl
@@ -127,11 +127,11 @@
     # and update the sha256 with the result.
     tf_http_archive(
         name = "XNNPACK",
-        sha256 = "5d35210ad504daa901a85fe0df4f699cdaf11428371192ef4cf5d9b2aca0549d",
-        strip_prefix = "XNNPACK-ec56b7ee14e4b186bfb95f96a69784c14fdea016",
+        sha256 = "5482fb0fcdc1df8b4842f8edf944443ea67ffe712a5cd846f0af484abe4f9a79",
+        strip_prefix = "XNNPACK-8f15372eb67ffab0d54cfe3752acaf8f8415af17",
         urls = [
-            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/XNNPACK/archive/ec56b7ee14e4b186bfb95f96a69784c14fdea016.zip",
-            "https://github.com/google/XNNPACK/archive/ec56b7ee14e4b186bfb95f96a69784c14fdea016.zip",
+            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/XNNPACK/archive/8f15372eb67ffab0d54cfe3752acaf8f8415af17.zip",
+            "https://github.com/google/XNNPACK/archive/8f15372eb67ffab0d54cfe3752acaf8f8415af17.zip",
         ],
     )