Support quantized FULLY_CONNECTED op in XNNPACK delegate
PiperOrigin-RevId: 369341848
Change-Id: I121c317ba6ef7975a1af06940d55093872c1ab7f
diff --git a/tensorflow/lite/delegates/xnnpack/BUILD b/tensorflow/lite/delegates/xnnpack/BUILD
index e793043..5d99239 100644
--- a/tensorflow/lite/delegates/xnnpack/BUILD
+++ b/tensorflow/lite/delegates/xnnpack/BUILD
@@ -276,6 +276,23 @@
)
cc_library(
+ name = "quantized_fully_connected_tester",
+ testonly = 1,
+ srcs = ["quantized_fully_connected_tester.cc"],
+ hdrs = ["quantized_fully_connected_tester.h"],
+ deps = [
+ "//tensorflow/lite:framework",
+ "//tensorflow/lite:schema_fbs_version",
+ "//tensorflow/lite/c:common",
+ "//tensorflow/lite/kernels:builtin_ops",
+ "//tensorflow/lite/schema:schema_conversion_utils",
+ "//tensorflow/lite/schema:schema_fbs",
+ "@com_google_googletest//:gtest",
+ "@flatbuffers",
+ ],
+)
+
+cc_library(
name = "reduce_tester",
testonly = 1,
srcs = ["reduce_tester.cc"],
@@ -765,6 +782,21 @@
)
cc_test(
+ name = "quantized_fully_connected_test",
+ srcs = ["quantized_fully_connected_test.cc"],
+ linkopts = select({
+ "//tensorflow:emscripten": EMSCRIPTEN_LINKOPTS,
+ "//conditions:default": [],
+ }),
+ deps = [
+ ":quantized_fully_connected_tester",
+ ":test_main",
+ ":xnnpack_delegate_test_mode",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_test(
name = "relu_test",
srcs = ["relu_test.cc"],
linkopts = select({
diff --git a/tensorflow/lite/delegates/xnnpack/quantized_fully_connected_test.cc b/tensorflow/lite/delegates/xnnpack/quantized_fully_connected_test.cc
new file mode 100644
index 0000000..0b7927b
--- /dev/null
+++ b/tensorflow/lite/delegates/xnnpack/quantized_fully_connected_test.cc
@@ -0,0 +1,326 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <random>
+
+#include <gtest/gtest.h>
+#include "tensorflow/lite/delegates/xnnpack/quantized_fully_connected_tester.h"
+#include "tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h"
+
+namespace tflite {
+namespace xnnpack {
+
+TEST(QuantizedFullyConnected, 1D) {
+ std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+ xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+ TfLiteXNNPackDelegateDelete);
+
+ std::random_device random_device;
+ auto rng = std::mt19937(random_device());
+ auto channels_rng =
+ std::bind(std::uniform_int_distribution<int32_t>(2, 9), std::ref(rng));
+ const auto input_channels = channels_rng();
+ const auto output_channels = channels_rng();
+
+ QuantizedFullyConnectedTester()
+ .InputShape({input_channels})
+ .InputChannels(input_channels)
+ .OutputChannels(output_channels)
+ .Test(xnnpack_delegate.get());
+}
+
+TEST(QuantizedFullyConnected, 1DKeepDims) {
+ std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+ xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+ TfLiteXNNPackDelegateDelete);
+
+ std::random_device random_device;
+ auto rng = std::mt19937(random_device());
+ auto channels_rng =
+ std::bind(std::uniform_int_distribution<int32_t>(2, 9), std::ref(rng));
+ const auto input_channels = channels_rng();
+ const auto output_channels = channels_rng();
+
+ QuantizedFullyConnectedTester()
+ .InputShape({input_channels})
+ .InputChannels(input_channels)
+ .OutputChannels(output_channels)
+ .KeepDims(true)
+ .Test(xnnpack_delegate.get());
+}
+
+TEST(QuantizedFullyConnected, 2D) {
+ std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+ xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+ TfLiteXNNPackDelegateDelete);
+
+ std::random_device random_device;
+ auto rng = std::mt19937(random_device());
+ auto batch_rng =
+ std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+ auto channels_rng =
+ std::bind(std::uniform_int_distribution<int32_t>(2, 9), std::ref(rng));
+ const auto batch = batch_rng();
+ const auto input_channels = channels_rng();
+ const auto output_channels = channels_rng();
+
+ QuantizedFullyConnectedTester()
+ .InputShape({batch, input_channels})
+ .InputChannels(input_channels)
+ .OutputChannels(output_channels)
+ .Test(xnnpack_delegate.get());
+}
+
+TEST(QuantizedFullyConnected, 2DKeepDims) {
+ std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+ xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+ TfLiteXNNPackDelegateDelete);
+
+ std::random_device random_device;
+ auto rng = std::mt19937(random_device());
+ auto batch_rng =
+ std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+ auto channels_rng =
+ std::bind(std::uniform_int_distribution<int32_t>(2, 9), std::ref(rng));
+ const auto batch = batch_rng();
+ const auto input_channels = channels_rng();
+ const auto output_channels = channels_rng();
+
+ QuantizedFullyConnectedTester()
+ .InputShape({batch, input_channels})
+ .InputChannels(input_channels)
+ .OutputChannels(output_channels)
+ .KeepDims(true)
+ .Test(xnnpack_delegate.get());
+}
+
+TEST(QuantizedFullyConnected, 3D) {
+ std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+ xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+ TfLiteXNNPackDelegateDelete);
+
+ std::random_device random_device;
+ auto rng = std::mt19937(random_device());
+ auto shape_rng =
+ std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+ auto channels_rng =
+ std::bind(std::uniform_int_distribution<int32_t>(2, 9), std::ref(rng));
+ const auto batch = shape_rng();
+ const auto width = shape_rng();
+ const auto input_channels = channels_rng();
+ const auto output_channels = channels_rng();
+
+ QuantizedFullyConnectedTester()
+ .InputShape({batch, width, input_channels})
+ .InputChannels(input_channels)
+ .OutputChannels(output_channels)
+ .Test(xnnpack_delegate.get());
+}
+
+TEST(QuantizedFullyConnected, 3DReshape) {
+ std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+ xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+ TfLiteXNNPackDelegateDelete);
+
+ std::random_device random_device;
+ auto rng = std::mt19937(random_device());
+ auto shape_rng =
+ std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+ auto channels_rng =
+ std::bind(std::uniform_int_distribution<int32_t>(2, 9), std::ref(rng));
+ const auto batch = shape_rng();
+ const auto width = shape_rng();
+ const auto input_channels = channels_rng();
+ const auto output_channels = channels_rng();
+
+ QuantizedFullyConnectedTester()
+ .InputShape({batch, width, input_channels})
+ .InputChannels(width * input_channels)
+ .OutputChannels(output_channels)
+ .Test(xnnpack_delegate.get());
+}
+
+TEST(QuantizedFullyConnected, 3DKeepDims) {
+ std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+ xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+ TfLiteXNNPackDelegateDelete);
+
+ std::random_device random_device;
+ auto rng = std::mt19937(random_device());
+ auto shape_rng =
+ std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+ auto channels_rng =
+ std::bind(std::uniform_int_distribution<int32_t>(2, 9), std::ref(rng));
+ const auto batch = shape_rng();
+ const auto width = shape_rng();
+ const auto input_channels = channels_rng();
+ const auto output_channels = channels_rng();
+
+ QuantizedFullyConnectedTester()
+ .InputShape({batch, width, input_channels})
+ .InputChannels(input_channels)
+ .OutputChannels(output_channels)
+ .KeepDims(true)
+ .Test(xnnpack_delegate.get());
+}
+
+TEST(QuantizedFullyConnected, 4D) {
+ std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+ xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+ TfLiteXNNPackDelegateDelete);
+
+ std::random_device random_device;
+ auto rng = std::mt19937(random_device());
+ auto shape_rng =
+ std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+ auto channels_rng =
+ std::bind(std::uniform_int_distribution<int32_t>(2, 9), std::ref(rng));
+ const auto batch = shape_rng();
+ const auto height = shape_rng();
+ const auto width = shape_rng();
+ const auto input_channels = channels_rng();
+ const auto output_channels = channels_rng();
+
+ QuantizedFullyConnectedTester()
+ .InputShape({batch, height, width, input_channels})
+ .InputChannels(input_channels)
+ .OutputChannels(output_channels)
+ .Test(xnnpack_delegate.get());
+}
+
+TEST(QuantizedFullyConnected, 4DKeepDims) {
+ std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+ xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+ TfLiteXNNPackDelegateDelete);
+
+ std::random_device random_device;
+ auto rng = std::mt19937(random_device());
+ auto shape_rng =
+ std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+ auto channels_rng =
+ std::bind(std::uniform_int_distribution<int32_t>(2, 9), std::ref(rng));
+ const auto batch = shape_rng();
+ const auto height = shape_rng();
+ const auto width = shape_rng();
+ const auto input_channels = channels_rng();
+ const auto output_channels = channels_rng();
+
+ QuantizedFullyConnectedTester()
+ .InputShape({batch, height, width, input_channels})
+ .InputChannels(input_channels)
+ .OutputChannels(output_channels)
+ .KeepDims(true)
+ .Test(xnnpack_delegate.get());
+}
+
+TEST(QuantizedFullyConnected, ReluActivation) {
+ std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+ xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+ TfLiteXNNPackDelegateDelete);
+
+ std::random_device random_device;
+ auto rng = std::mt19937(random_device());
+ auto batch_rng =
+ std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+ auto channels_rng =
+ std::bind(std::uniform_int_distribution<int32_t>(2, 9), std::ref(rng));
+ const auto batch = batch_rng();
+ const auto input_channels = channels_rng();
+ const auto output_channels = channels_rng();
+
+ QuantizedFullyConnectedTester()
+ .InputShape({batch, input_channels})
+ .InputChannels(input_channels)
+ .OutputChannels(output_channels)
+ .ReluActivation()
+ .Test(xnnpack_delegate.get());
+}
+
+TEST(QuantizedFullyConnected, Relu6Activation) {
+ std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+ xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+ TfLiteXNNPackDelegateDelete);
+
+ std::random_device random_device;
+ auto rng = std::mt19937(random_device());
+ auto batch_rng =
+ std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+ auto channels_rng =
+ std::bind(std::uniform_int_distribution<int32_t>(2, 9), std::ref(rng));
+ const auto batch = batch_rng();
+ const auto input_channels = channels_rng();
+ const auto output_channels = channels_rng();
+
+ QuantizedFullyConnectedTester()
+ .InputShape({batch, input_channels})
+ .InputChannels(input_channels)
+ .OutputChannels(output_channels)
+ .Relu6Activation()
+ .Test(xnnpack_delegate.get());
+}
+
+TEST(QuantizedFullyConnected, ReluMinus1To1Activation) {
+ std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+ xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
+ TfLiteXNNPackDelegateDelete);
+
+ std::random_device random_device;
+ auto rng = std::mt19937(random_device());
+ auto batch_rng =
+ std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+ auto channels_rng =
+ std::bind(std::uniform_int_distribution<int32_t>(2, 9), std::ref(rng));
+ const auto batch = batch_rng();
+ const auto input_channels = channels_rng();
+ const auto output_channels = channels_rng();
+
+ QuantizedFullyConnectedTester()
+ .InputShape({batch, input_channels})
+ .InputChannels(input_channels)
+ .OutputChannels(output_channels)
+ .ReluMinus1To1Activation()
+ .Test(xnnpack_delegate.get());
+}
+
+TEST(QuantizedFullyConnected, MultiThreading) {
+ TfLiteXNNPackDelegateOptions delegate_options =
+ TfLiteXNNPackDelegateOptionsDefault();
+ delegate_options.num_threads = 2;
+ std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
+ xnnpack_delegate(TfLiteXNNPackDelegateCreate(&delegate_options),
+ TfLiteXNNPackDelegateDelete);
+
+ std::random_device random_device;
+ auto rng = std::mt19937(random_device());
+ auto batch_rng =
+ std::bind(std::uniform_int_distribution<int32_t>(2, 5), std::ref(rng));
+ auto channels_rng =
+ std::bind(std::uniform_int_distribution<int32_t>(2, 9), std::ref(rng));
+ const auto batch = batch_rng();
+ const auto input_channels = channels_rng();
+ const auto output_channels = channels_rng();
+
+ QuantizedFullyConnectedTester()
+ .InputShape({batch, input_channels})
+ .InputChannels(input_channels)
+ .OutputChannels(output_channels)
+ .Test(xnnpack_delegate.get());
+}
+
+} // namespace xnnpack
+} // namespace tflite
diff --git a/tensorflow/lite/delegates/xnnpack/quantized_fully_connected_tester.cc b/tensorflow/lite/delegates/xnnpack/quantized_fully_connected_tester.cc
new file mode 100644
index 0000000..1b967b1
--- /dev/null
+++ b/tensorflow/lite/delegates/xnnpack/quantized_fully_connected_tester.cc
@@ -0,0 +1,240 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/lite/delegates/xnnpack/quantized_fully_connected_tester.h"
+
+#include <array>
+#include <cstdint>
+#include <functional>
+#include <limits>
+#include <numeric>
+#include <random>
+#include <vector>
+
+#include <gtest/gtest.h>
+#include "flatbuffers/flatbuffers.h" // from @flatbuffers
+#include "tensorflow/lite/interpreter.h"
+#include "tensorflow/lite/kernels/register.h"
+#include "tensorflow/lite/model.h"
+#include "tensorflow/lite/schema/schema_conversion_utils.h"
+#include "tensorflow/lite/schema/schema_generated.h"
+#include "tensorflow/lite/version.h"
+
+namespace tflite {
+namespace xnnpack {
+
+std::vector<int32_t> QuantizedFullyConnectedTester::OutputShape() const {
+ EXPECT_NE(input_shape_.size(), 0);
+ if (KeepDims()) {
+ std::vector<int32_t> output_shape(input_shape_.cbegin(),
+ input_shape_.cend() - 1);
+ output_shape.push_back(OutputChannels());
+ return output_shape;
+ } else {
+ EXPECT_EQ(InputSize() % InputChannels(), 0);
+ return std::vector<int32_t>(
+ {InputSize() / InputChannels(), OutputChannels()});
+ }
+}
+
+void QuantizedFullyConnectedTester::Test(TfLiteDelegate* delegate) const {
+ std::random_device random_device;
+ auto rng = std::mt19937(random_device());
+ auto input_rng = std::bind(std::uniform_int_distribution<int32_t>(
+ std::numeric_limits<int8_t>::min(),
+ std::numeric_limits<int8_t>::max()),
+ std::ref(rng));
+
+ std::vector<char> buffer = CreateTfLiteModel();
+ const Model* model = GetModel(buffer.data());
+
+ std::unique_ptr<Interpreter> delegate_interpreter;
+ ASSERT_EQ(
+ InterpreterBuilder(
+ model,
+ ::tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates())(
+ &delegate_interpreter),
+ kTfLiteOk);
+ std::unique_ptr<Interpreter> default_interpreter;
+ ASSERT_EQ(
+ InterpreterBuilder(
+ model,
+ ::tflite::ops::builtin::BuiltinOpResolverWithoutDefaultDelegates())(
+ &default_interpreter),
+ kTfLiteOk);
+
+ ASSERT_TRUE(delegate_interpreter);
+ ASSERT_TRUE(default_interpreter);
+
+ ASSERT_EQ(delegate_interpreter->inputs().size(), 1);
+ ASSERT_EQ(default_interpreter->inputs().size(), 1);
+
+ ASSERT_EQ(delegate_interpreter->outputs().size(), 1);
+ ASSERT_EQ(default_interpreter->outputs().size(), 1);
+
+ ASSERT_EQ(delegate_interpreter->AllocateTensors(), kTfLiteOk);
+ ASSERT_EQ(default_interpreter->AllocateTensors(), kTfLiteOk);
+
+ ASSERT_EQ(delegate_interpreter->ModifyGraphWithDelegate(delegate), kTfLiteOk);
+
+ int8_t* default_input_data = default_interpreter->typed_tensor<int8_t>(
+ default_interpreter->inputs()[0]);
+ std::generate(default_input_data, default_input_data + InputSize(),
+ std::ref(input_rng));
+
+ int8_t* delegate_input_data = delegate_interpreter->typed_tensor<int8_t>(
+ delegate_interpreter->inputs()[0]);
+ std::copy(default_input_data, default_input_data + InputSize(),
+ delegate_input_data);
+
+ ASSERT_EQ(default_interpreter->Invoke(), kTfLiteOk);
+ ASSERT_EQ(delegate_interpreter->Invoke(), kTfLiteOk);
+
+ int8_t* default_output_data = default_interpreter->typed_tensor<int8_t>(
+ default_interpreter->outputs()[0]);
+ int8_t* delegate_output_data = delegate_interpreter->typed_tensor<int8_t>(
+ delegate_interpreter->outputs()[0]);
+
+ for (size_t i = 0; i < ComputeSize(OutputShape()); i++) {
+ ASSERT_LE(std::abs(static_cast<int32_t>(default_output_data[i]) -
+ static_cast<int32_t>(delegate_output_data[i])),
+ 1);
+ }
+}
+
+std::vector<char> QuantizedFullyConnectedTester::CreateTfLiteModel() const {
+ std::random_device random_device;
+ auto rng = std::mt19937(random_device());
+ auto filter_rng = std::bind(std::uniform_int_distribution<int32_t>(
+ -std::numeric_limits<int8_t>::max(),
+ std::numeric_limits<int8_t>::max()),
+ std::ref(rng));
+ auto bias_rng = std::bind(
+ std::uniform_int_distribution<int32_t>(-10000, 10000), std::ref(rng));
+
+ flatbuffers::FlatBufferBuilder builder;
+ const std::array<flatbuffers::Offset<OperatorCode>, 1> operator_codes{
+ {CreateOperatorCode(builder, BuiltinOperator_FULLY_CONNECTED)}};
+ std::vector<flatbuffers::Offset<Operator>> operators;
+
+ std::vector<float> filter_data(InputChannels() * OutputChannels());
+ std::generate(filter_data.begin(), filter_data.end(), std::ref(filter_rng));
+ std::vector<float> bias_data(OutputChannels());
+ std::generate(bias_data.begin(), bias_data.end(), std::ref(bias_rng));
+
+ const std::array<flatbuffers::Offset<Buffer>, 3> buffers{{
+ CreateBuffer(builder, builder.CreateVector({})),
+ CreateBuffer(builder,
+ builder.CreateVector(
+ reinterpret_cast<const uint8_t*>(filter_data.data()),
+ sizeof(int8_t) * filter_data.size())),
+ CreateBuffer(builder,
+ builder.CreateVector(
+ reinterpret_cast<const uint8_t*>(bias_data.data()),
+ sizeof(int32_t) * bias_data.size())),
+ }};
+
+ const std::array<int32_t, 2> filter_shape{
+ {OutputChannels(), InputChannels()}};
+ const std::array<int32_t, 1> bias_shape{{OutputChannels()}};
+
+ const std::vector<int32_t> output_shape = OutputShape();
+ const std::array<flatbuffers::Offset<Tensor>, 4> tensors{{
+ CreateTensor(builder,
+ builder.CreateVector<int32_t>(InputShape().data(),
+ InputShape().size()),
+ TensorType_INT8, /*buffer=*/0, /*name=*/0,
+ CreateQuantizationParameters(
+ builder, /*min=*/0, /*max=*/0,
+ builder.CreateVector<float>({InputScale()}),
+ builder.CreateVector<int64_t>({InputZeroPoint()}))),
+ CreateTensor(builder,
+ builder.CreateVector<int32_t>(filter_shape.data(),
+ filter_shape.size()),
+ TensorType_INT8, /*buffer=*/1, /*name=*/0,
+ CreateQuantizationParameters(
+ builder, /*min=*/0, /*max=*/0,
+ builder.CreateVector<float>({FilterScale()}),
+ builder.CreateVector<int64_t>({0}))),
+ CreateTensor(
+ builder,
+ builder.CreateVector<int32_t>(bias_shape.data(), bias_shape.size()),
+ TensorType_INT32, /*buffer=*/2, /*name=*/0,
+ CreateQuantizationParameters(
+ builder, /*min=*/0, /*max=*/0,
+ builder.CreateVector<float>({InputScale() * FilterScale()}),
+ builder.CreateVector<int64_t>({0}))),
+ CreateTensor(builder,
+ builder.CreateVector<int32_t>(output_shape.data(),
+ output_shape.size()),
+ TensorType_INT8, /*buffer=*/0, /*name=*/0,
+ CreateQuantizationParameters(
+ builder, /*min=*/0, /*max=*/0,
+ builder.CreateVector<float>({OutputScale()}),
+ builder.CreateVector<int64_t>({OutputZeroPoint()}))),
+ }};
+
+ flatbuffers::Offset<FullyConnectedOptions> fully_connected_options =
+ CreateFullyConnectedOptions(builder, Activation(),
+ FullyConnectedOptionsWeightsFormat_DEFAULT,
+ KeepDims());
+
+ const std::array<int32_t, 3> op_inputs{
+ {static_cast<int>(tensors.size()) - 4,
+ static_cast<int>(tensors.size()) - 3,
+ static_cast<int>(tensors.size()) - 2}};
+ const std::array<int32_t, 1> op_outputs{
+ {static_cast<int>(tensors.size()) - 1}};
+ operators.emplace_back(CreateOperator(
+ builder, /*opcode_index=*/0,
+ builder.CreateVector<int32_t>(op_inputs.data(), op_inputs.size()),
+ builder.CreateVector<int32_t>(op_outputs.data(), op_outputs.size()),
+ BuiltinOptions_FullyConnectedOptions, fully_connected_options.Union()));
+
+ const std::array<int32_t, 1> subgraph_inputs{
+ {static_cast<int>(tensors.size()) - 4}};
+ const std::array<int32_t, 1> subgraph_outputs{
+ {static_cast<int>(tensors.size()) - 1}};
+ flatbuffers::Offset<SubGraph> subgraph = CreateSubGraph(
+ builder, builder.CreateVector(tensors.data(), tensors.size()),
+ builder.CreateVector<int32_t>(subgraph_inputs.data(),
+ subgraph_inputs.size()),
+ builder.CreateVector<int32_t>(subgraph_outputs.data(),
+ subgraph_outputs.size()),
+ builder.CreateVector(operators.data(), operators.size()));
+
+ flatbuffers::Offset<flatbuffers::String> description =
+ builder.CreateString("Fully Connected model");
+
+ flatbuffers::Offset<Model> model_buffer = CreateModel(
+ builder, TFLITE_SCHEMA_VERSION,
+ builder.CreateVector(operator_codes.data(), operator_codes.size()),
+ builder.CreateVector(&subgraph, 1), description,
+ builder.CreateVector(buffers.data(), buffers.size()));
+
+ builder.Finish(model_buffer);
+
+ return std::vector<char>(builder.GetBufferPointer(),
+ builder.GetBufferPointer() + builder.GetSize());
+}
+
+int32_t QuantizedFullyConnectedTester::ComputeSize(
+ const std::vector<int32_t>& shape) {
+ return std::accumulate(shape.cbegin(), shape.cend(), 1,
+ std::multiplies<int32_t>());
+}
+
+} // namespace xnnpack
+} // namespace tflite
diff --git a/tensorflow/lite/delegates/xnnpack/quantized_fully_connected_tester.h b/tensorflow/lite/delegates/xnnpack/quantized_fully_connected_tester.h
new file mode 100644
index 0000000..0e708ef
--- /dev/null
+++ b/tensorflow/lite/delegates/xnnpack/quantized_fully_connected_tester.h
@@ -0,0 +1,156 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_DELEGATES_XNNPACK_QUANTIZED_FULLY_CONNECTED_TESTER_H_
+#define TENSORFLOW_LITE_DELEGATES_XNNPACK_QUANTIZED_FULLY_CONNECTED_TESTER_H_
+
+#include <cstdint>
+#include <vector>
+
+#include <gtest/gtest.h>
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/schema/schema_generated.h"
+
+namespace tflite {
+namespace xnnpack {
+
+class QuantizedFullyConnectedTester {
+ public:
+ QuantizedFullyConnectedTester() = default;
+ QuantizedFullyConnectedTester(const QuantizedFullyConnectedTester&) = delete;
+ QuantizedFullyConnectedTester& operator=(
+ const QuantizedFullyConnectedTester&) = delete;
+
+ inline QuantizedFullyConnectedTester& InputShape(
+ std::initializer_list<int32_t> shape) {
+ for (auto it = shape.begin(); it != shape.end(); ++it) {
+ EXPECT_GT(*it, 0);
+ }
+ input_shape_ = std::vector<int32_t>(shape.begin(), shape.end());
+ input_size_ = ComputeSize(input_shape_);
+ return *this;
+ }
+
+ inline const std::vector<int32_t>& InputShape() const { return input_shape_; }
+
+ inline int32_t InputSize() const { return input_size_; }
+
+ inline QuantizedFullyConnectedTester& InputChannels(int32_t input_channels) {
+ EXPECT_GT(input_channels, 0);
+ input_channels_ = input_channels;
+ return *this;
+ }
+
+ inline int32_t InputChannels() const { return input_channels_; }
+
+ inline QuantizedFullyConnectedTester& OutputChannels(
+ int32_t output_channels) {
+ EXPECT_GT(output_channels, 0);
+ output_channels_ = output_channels;
+ return *this;
+ }
+
+ inline int32_t OutputChannels() const { return output_channels_; }
+
+ std::vector<int32_t> OutputShape() const;
+
+ inline QuantizedFullyConnectedTester& InputZeroPoint(
+ int8_t input_zero_point) {
+ input_zero_point_ = input_zero_point;
+ return *this;
+ }
+
+ inline int8_t InputZeroPoint() const { return input_zero_point_; }
+
+ inline QuantizedFullyConnectedTester& OutputZeroPoint(
+ int8_t output_zero_point) {
+ output_zero_point_ = output_zero_point;
+ return *this;
+ }
+
+ inline int8_t OutputZeroPoint() const { return output_zero_point_; }
+
+ inline QuantizedFullyConnectedTester& InputScale(float input_scale) {
+ input_scale_ = input_scale;
+ return *this;
+ }
+
+ inline float InputScale() const { return input_scale_; }
+
+ inline QuantizedFullyConnectedTester& FilterScale(float filter_scale) {
+ filter_scale_ = filter_scale;
+ return *this;
+ }
+
+ inline float FilterScale() const { return filter_scale_; }
+
+ inline QuantizedFullyConnectedTester& OutputScale(float output_scale) {
+ output_scale_ = output_scale;
+ return *this;
+ }
+
+ inline float OutputScale() const { return output_scale_; }
+
+ inline QuantizedFullyConnectedTester& KeepDims(bool keep_dims) {
+ keep_dims_ = keep_dims;
+ return *this;
+ }
+
+ inline bool KeepDims() const { return keep_dims_; }
+
+ inline QuantizedFullyConnectedTester& ReluActivation() {
+ activation_ = ::tflite::ActivationFunctionType_RELU;
+ return *this;
+ }
+
+ inline QuantizedFullyConnectedTester& Relu6Activation() {
+ activation_ = ::tflite::ActivationFunctionType_RELU6;
+ return *this;
+ }
+
+ inline QuantizedFullyConnectedTester& ReluMinus1To1Activation() {
+ activation_ = ::tflite::ActivationFunctionType_RELU_N1_TO_1;
+ return *this;
+ }
+
+ void Test(TfLiteDelegate* delegate) const;
+
+ private:
+ std::vector<char> CreateTfLiteModel() const;
+
+ inline ::tflite::ActivationFunctionType Activation() const {
+ return activation_;
+ }
+
+ static int32_t ComputeSize(const std::vector<int32_t>& shape);
+
+ std::vector<int32_t> input_shape_;
+ int32_t input_size_ = 1;
+ int32_t input_channels_ = 1;
+ int32_t output_channels_ = 1;
+ int8_t input_zero_point_ = 0;
+ int8_t output_zero_point_ = 0;
+ float input_scale_ = 0.8f;
+ float filter_scale_ = 0.75f;
+ float output_scale_ = 1.5f;
+ bool keep_dims_ = false;
+ ::tflite::ActivationFunctionType activation_ =
+ ::tflite::ActivationFunctionType_NONE;
+};
+
+} // namespace xnnpack
+} // namespace tflite
+
+#endif // TENSORFLOW_LITE_DELEGATES_XNNPACK_QUANTIZED_FULLY_CONNECTED_TESTER_H_
diff --git a/tensorflow/lite/tools/cmake/modules/xnnpack.cmake b/tensorflow/lite/tools/cmake/modules/xnnpack.cmake
index bbeca1a..3405b64 100644
--- a/tensorflow/lite/tools/cmake/modules/xnnpack.cmake
+++ b/tensorflow/lite/tools/cmake/modules/xnnpack.cmake
@@ -22,7 +22,7 @@
OverridableFetchContent_Declare(
xnnpack
GIT_REPOSITORY https://github.com/google/XNNPACK
- GIT_TAG ec56b7ee14e4b186bfb95f96a69784c14fdea016
+ GIT_TAG 8f15372eb67ffab0d54cfe3752acaf8f8415af17
GIT_PROGRESS TRUE
PREFIX "${CMAKE_BINARY_DIR}"
SOURCE_DIR "${CMAKE_BINARY_DIR}/xnnpack"
diff --git a/tensorflow/workspace2.bzl b/tensorflow/workspace2.bzl
index 8c00240..4a2764e 100644
--- a/tensorflow/workspace2.bzl
+++ b/tensorflow/workspace2.bzl
@@ -127,11 +127,11 @@
# and update the sha256 with the result.
tf_http_archive(
name = "XNNPACK",
- sha256 = "5d35210ad504daa901a85fe0df4f699cdaf11428371192ef4cf5d9b2aca0549d",
- strip_prefix = "XNNPACK-ec56b7ee14e4b186bfb95f96a69784c14fdea016",
+ sha256 = "5482fb0fcdc1df8b4842f8edf944443ea67ffe712a5cd846f0af484abe4f9a79",
+ strip_prefix = "XNNPACK-8f15372eb67ffab0d54cfe3752acaf8f8415af17",
urls = [
- "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/XNNPACK/archive/ec56b7ee14e4b186bfb95f96a69784c14fdea016.zip",
- "https://github.com/google/XNNPACK/archive/ec56b7ee14e4b186bfb95f96a69784c14fdea016.zip",
+ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/XNNPACK/archive/8f15372eb67ffab0d54cfe3752acaf8f8415af17.zip",
+ "https://github.com/google/XNNPACK/archive/8f15372eb67ffab0d54cfe3752acaf8f8415af17.zip",
],
)