XNNPack delegate to support delegating grouped conv2d op.
PiperOrigin-RevId: 436813859
diff --git a/tensorflow/lite/delegates/xnnpack/conv_2d_test.cc b/tensorflow/lite/delegates/xnnpack/conv_2d_test.cc
index f18e7f8..837e102 100644
--- a/tensorflow/lite/delegates/xnnpack/conv_2d_test.cc
+++ b/tensorflow/lite/delegates/xnnpack/conv_2d_test.cc
@@ -105,36 +105,6 @@
.Test(xnnpack_delegate.get());
}
-TEST(Conv2D, Grouped) {
- std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
- xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
- TfLiteXNNPackDelegateDelete);
-
- std::random_device random_device;
- auto rng = std::mt19937(random_device());
- auto batch_rng =
- std::bind(std::uniform_int_distribution<int32_t>(2, 4), std::ref(rng));
- auto input_rng =
- std::bind(std::uniform_int_distribution<int32_t>(5, 25), std::ref(rng));
- auto channel_per_group_rng =
- std::bind(std::uniform_int_distribution<int32_t>(2, 16), std::ref(rng));
- auto groups_rng =
- std::bind(std::uniform_int_distribution<int32_t>(2, 8), std::ref(rng));
-
- auto groups = groups_rng();
- Conv2DTester()
- .BatchSize(batch_rng())
- .InputHeight(input_rng())
- .InputWidth(input_rng())
- .InputChannels(groups * channel_per_group_rng())
- .OutputChannels(groups * channel_per_group_rng())
- .Groups(groups)
- .KernelHeight(3)
- .KernelWidth(3)
- .SamePadding()
- .Test(xnnpack_delegate.get());
-}
-
TEST(Conv2D, SmallKernelWithSamePadding) {
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
diff --git a/tensorflow/lite/delegates/xnnpack/conv_2d_tester.cc b/tensorflow/lite/delegates/xnnpack/conv_2d_tester.cc
index 082a27c..222fdac 100644
--- a/tensorflow/lite/delegates/xnnpack/conv_2d_tester.cc
+++ b/tensorflow/lite/delegates/xnnpack/conv_2d_tester.cc
@@ -140,8 +140,8 @@
densify_filter_outputs.size())));
}
- const std::vector<int32_t> filter_shape = {
- OutputChannels(), KernelHeight(), KernelWidth(), KernelInputChannels()};
+ const std::vector<int32_t> filter_shape = {OutputChannels(), KernelHeight(),
+ KernelWidth(), InputChannels()};
const std::vector<int32_t> bias_shape = {OutputChannels()};
std::vector<float> filter_scales;
std::vector<int64_t> filter_zero_points;
@@ -151,7 +151,7 @@
CreateOperatorCode(builder, BuiltinOperator_DEQUANTIZE));
std::vector<uint16_t> filter_data(OutputChannels() * KernelHeight() *
- KernelWidth() * KernelInputChannels());
+ KernelWidth() * InputChannels());
std::vector<uint16_t> bias_data(OutputChannels());
for (int32_t oc = 0; oc < OutputChannels(); oc++) {
// Use the same range of all-positive or all-negative values to generate
@@ -165,12 +165,12 @@
std::min(range, 0.0f), std::max(range, 0.0f)),
std::ref(rng)));
bias_data[oc] = value_rng();
- for (int32_t ic = 0; ic < KernelInputChannels(); ic++) {
+ for (int32_t ic = 0; ic < InputChannels(); ic++) {
for (int32_t y = 0; y < KernelHeight(); y++) {
for (int32_t x = 0; x < KernelWidth(); x++) {
const int32_t index =
((oc * KernelHeight() + y) * KernelWidth() + x) *
- KernelInputChannels() +
+ InputChannels() +
ic;
filter_data[index] = value_rng();
}
@@ -209,7 +209,7 @@
dequantize_bias_outputs.size())));
} else {
std::vector<float> filter_data(OutputChannels() * KernelHeight() *
- KernelWidth() * KernelInputChannels());
+ KernelWidth() * InputChannels());
std::vector<float> bias_data(OutputChannels());
for (int32_t oc = 0; oc < OutputChannels(); oc++) {
// Use the same range of all-positive or all-negative values to generate
@@ -222,12 +222,12 @@
std::min(range, 0.0f), std::max(range, 0.0f)),
std::ref(rng));
bias_data[oc] = value_rng();
- for (int32_t ic = 0; ic < KernelInputChannels(); ic++) {
+ for (int32_t ic = 0; ic < InputChannels(); ic++) {
for (int32_t y = 0; y < KernelHeight(); y++) {
for (int32_t x = 0; x < KernelWidth(); x++) {
const int32_t index =
((oc * KernelHeight() + y) * KernelWidth() + x) *
- KernelInputChannels() +
+ InputChannels() +
ic;
filter_data[index] = value_rng();
}
diff --git a/tensorflow/lite/delegates/xnnpack/conv_2d_tester.h b/tensorflow/lite/delegates/xnnpack/conv_2d_tester.h
index 16144a2..a0034db 100644
--- a/tensorflow/lite/delegates/xnnpack/conv_2d_tester.h
+++ b/tensorflow/lite/delegates/xnnpack/conv_2d_tester.h
@@ -56,19 +56,6 @@
inline int32_t OutputChannels() const { return output_channels_; }
- inline Conv2DTester& Groups(int32_t groups) {
- EXPECT_EQ(InputChannels() % groups, 0);
- EXPECT_EQ(OutputChannels() % groups, 0);
- groups_ = groups;
- return *this;
- }
-
- inline int32_t Groups() const { return groups_; }
-
- inline int32_t KernelInputChannels() const {
- return input_channels_ / groups_;
- }
-
inline Conv2DTester& InputHeight(int32_t input_height) {
EXPECT_GT(input_height, 0);
input_height_ = input_height;
@@ -240,7 +227,6 @@
int32_t batch_size_ = 1;
int32_t input_channels_ = 1;
int32_t output_channels_ = 1;
- int32_t groups_ = 1;
int32_t input_height_ = 1;
int32_t input_width_ = 1;
int32_t kernel_height_ = 1;
diff --git a/tensorflow/lite/delegates/xnnpack/quantized_conv_2d_tester.cc b/tensorflow/lite/delegates/xnnpack/quantized_conv_2d_tester.cc
index 8d4faf7..1ae2119 100644
--- a/tensorflow/lite/delegates/xnnpack/quantized_conv_2d_tester.cc
+++ b/tensorflow/lite/delegates/xnnpack/quantized_conv_2d_tester.cc
@@ -138,7 +138,7 @@
{CreateOperatorCode(builder, BuiltinOperator_CONV_2D)}};
std::vector<int8_t> filter_data(OutputChannels() * KernelHeight() *
- KernelWidth() * KernelInputChannels());
+ KernelWidth() * InputChannels());
std::generate(filter_data.begin(), filter_data.end(), std::ref(filter_rng));
std::vector<int32_t> bias_data(OutputChannels());
std::generate(bias_data.begin(), bias_data.end(), std::ref(bias_rng));
@@ -160,7 +160,7 @@
const std::array<int32_t, 4> output_shape{
{BatchSize(), OutputHeight(), OutputWidth(), OutputChannels()}};
const std::array<int32_t, 4> filter_shape{
- {OutputChannels(), KernelHeight(), KernelWidth(), KernelInputChannels()}};
+ {OutputChannels(), KernelHeight(), KernelWidth(), InputChannels()}};
const std::array<int32_t, 1> bias_shape{{OutputChannels()}};
flatbuffers::Offset<flatbuffers::Vector<float>> filter_scale_offset = 0;
diff --git a/tensorflow/lite/delegates/xnnpack/quantized_conv_2d_tester.h b/tensorflow/lite/delegates/xnnpack/quantized_conv_2d_tester.h
index 8200fd2..9f6215d 100644
--- a/tensorflow/lite/delegates/xnnpack/quantized_conv_2d_tester.h
+++ b/tensorflow/lite/delegates/xnnpack/quantized_conv_2d_tester.h
@@ -152,19 +152,6 @@
return (KernelWidth() - 1) * DilationWidth() + 1;
}
- inline QuantizedConv2DTester& Groups(int32_t groups) {
- EXPECT_EQ(InputChannels() % groups, 0);
- EXPECT_EQ(OutputChannels() % groups, 0);
- groups_ = groups;
- return *this;
- }
-
- inline int32_t Groups() const { return groups_; }
-
- inline int32_t KernelInputChannels() const {
- return input_channels_ / groups_;
- }
-
inline QuantizedConv2DTester& InputZeroPoint(int32_t input_zero_point) {
input_zero_point_ = input_zero_point;
return *this;
@@ -269,7 +256,6 @@
int32_t batch_size_ = 1;
int32_t input_channels_ = 1;
int32_t output_channels_ = 1;
- int32_t groups_ = 1;
int32_t input_height_ = 1;
int32_t input_width_ = 1;
int32_t kernel_height_ = 1;
diff --git a/tensorflow/lite/delegates/xnnpack/signed_quantized_conv_2d_test.cc b/tensorflow/lite/delegates/xnnpack/signed_quantized_conv_2d_test.cc
index 20740cd..26377db 100644
--- a/tensorflow/lite/delegates/xnnpack/signed_quantized_conv_2d_test.cc
+++ b/tensorflow/lite/delegates/xnnpack/signed_quantized_conv_2d_test.cc
@@ -124,44 +124,6 @@
.Test(xnnpack_delegate.get());
}
-TEST(SignedQuantizedConv2D, Grouped) {
- std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
- xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
- TfLiteXNNPackDelegateDelete);
-
- std::random_device random_device;
- auto rng = std::mt19937(random_device());
- auto zero_point_rng = std::bind(std::uniform_int_distribution<int32_t>(
- std::numeric_limits<int8_t>::min(),
- std::numeric_limits<int8_t>::max()),
- std::ref(rng));
- auto batch_rng =
- std::bind(std::uniform_int_distribution<int32_t>(2, 4), std::ref(rng));
- auto input_rng =
- std::bind(std::uniform_int_distribution<int32_t>(5, 25), std::ref(rng));
- auto channel_per_group_rng =
- std::bind(std::uniform_int_distribution<int32_t>(2, 16), std::ref(rng));
- auto groups_rng =
- std::bind(std::uniform_int_distribution<int32_t>(2, 8), std::ref(rng));
-
- auto groups = groups_rng();
- QuantizedConv2DTester()
- .InputZeroPoint(zero_point_rng())
- .OutputZeroPoint(zero_point_rng())
- .BatchSize(batch_rng())
- .InputHeight(input_rng())
- .InputWidth(input_rng())
- .InputChannels(groups * channel_per_group_rng())
- .OutputChannels(groups * channel_per_group_rng())
- .Groups(groups)
- .KernelHeight(3)
- .KernelWidth(3)
- .StrideHeight(2)
- .StrideWidth(2)
- .SamePadding()
- .Test(xnnpack_delegate.get());
-}
-
TEST(SignedQuantizedConv2D, SmallKernelWithSamePadding) {
std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
diff --git a/tensorflow/lite/delegates/xnnpack/unsigned_quantized_conv_2d_test.cc b/tensorflow/lite/delegates/xnnpack/unsigned_quantized_conv_2d_test.cc
index 19e6ea1..87d2e16 100644
--- a/tensorflow/lite/delegates/xnnpack/unsigned_quantized_conv_2d_test.cc
+++ b/tensorflow/lite/delegates/xnnpack/unsigned_quantized_conv_2d_test.cc
@@ -113,47 +113,6 @@
std::bind(std::uniform_int_distribution<int32_t>(2, 4), std::ref(rng));
auto input_rng =
std::bind(std::uniform_int_distribution<int32_t>(5, 25), std::ref(rng));
- auto channel_per_group_rng =
- std::bind(std::uniform_int_distribution<int32_t>(2, 16), std::ref(rng));
- auto groups_rng =
- std::bind(std::uniform_int_distribution<int32_t>(2, 8), std::ref(rng));
-
- auto groups = groups_rng();
- QuantizedConv2DTester()
- .InputZeroPoint(zero_point_rng())
- .OutputZeroPoint(zero_point_rng())
- .KernelZeroPoint(kernel_zero_point_rng())
- .BatchSize(batch_rng())
- .InputHeight(input_rng())
- .InputWidth(input_rng())
- .InputChannels(groups * channel_per_group_rng())
- .OutputChannels(groups * channel_per_group_rng())
- .Groups(groups)
- .KernelHeight(3)
- .KernelWidth(3)
- .StrideHeight(2)
- .StrideWidth(2)
- .SamePadding()
- .Test(xnnpack_delegate.get());
-}
-
-TEST(UnsignedQuantizedConv2D, Grouped) {
- std::unique_ptr<TfLiteDelegate, decltype(&TfLiteXNNPackDelegateDelete)>
- xnnpack_delegate(TfLiteXNNPackDelegateCreate(nullptr),
- TfLiteXNNPackDelegateDelete);
-
- std::random_device random_device;
- auto rng = std::mt19937(random_device());
- auto zero_point_rng = std::bind(std::uniform_int_distribution<int32_t>(
- std::numeric_limits<uint8_t>::min(),
- std::numeric_limits<uint8_t>::max()),
- std::ref(rng));
- auto kernel_zero_point_rng = std::bind(
- std::uniform_int_distribution<int32_t>(100, 150), std::ref(rng));
- auto batch_rng =
- std::bind(std::uniform_int_distribution<int32_t>(2, 4), std::ref(rng));
- auto input_rng =
- std::bind(std::uniform_int_distribution<int32_t>(5, 25), std::ref(rng));
auto channel_rng =
std::bind(std::uniform_int_distribution<int32_t>(2, 16), std::ref(rng));
diff --git a/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc b/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc
index ef80445..c8abc64 100644
--- a/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc
+++ b/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc
@@ -2321,7 +2321,6 @@
const int kernel_height = SizeOfDimension(&filter_tensor, 1);
const int kernel_width = SizeOfDimension(&filter_tensor, 2);
const int input_channels = SizeOfDimension(&filter_tensor, 3);
- const int groups = SizeOfDimension(&input_tensor, 3) / input_channels;
uint32_t flags;
TF_LITE_ENSURE_STATUS(CalculatePadding(
@@ -2344,9 +2343,9 @@
static_cast<uint32_t>(conv_params->stride_height),
static_cast<uint32_t>(conv_params->stride_width),
static_cast<uint32_t>(conv_params->dilation_height_factor),
- static_cast<uint32_t>(conv_params->dilation_width_factor), groups,
- static_cast<size_t>(input_channels),
- static_cast<size_t>(output_channels) / groups, output_min, output_max,
+ static_cast<uint32_t>(conv_params->dilation_width_factor),
+ /*groups=*/1, static_cast<size_t>(input_channels),
+ static_cast<size_t>(output_channels), output_min, output_max,
/*input_id=*/xnnpack_tensors[node->inputs->data[0]],
/*filter_id=*/xnnpack_tensors[node->inputs->data[1]],
/*bias_id=*/xnnpack_tensors[node->inputs->data[2]],