Add build-time Bazel options for offloading quantized inference to XNNPACK
Add --define tflite_with_xnnpack_qs8={true|false} Bazel option to
enable/disable offloading of signed 8-bit quantized operators to XNNPACK and
--define tflite_with_xnnpack_qu8={true|false} Bazel option to enable/disable
offloading of unsigned 8-bit quantized operators to XNNPACK. Default to
enabling offloading of quantized inference in WebAssembly builds and disaling
in all other builds (no change w.r.t previous behaviour).
PiperOrigin-RevId: 428930152
Change-Id: Id68c688c5d015197c70b64bac846ad67c92d1537
diff --git a/tensorflow/lite/BUILD b/tensorflow/lite/BUILD
index 4f2f7eb..982191b 100644
--- a/tensorflow/lite/BUILD
+++ b/tensorflow/lite/BUILD
@@ -443,28 +443,7 @@
"model_builder.h",
],
compatible_with = get_compatible_with_portable(),
- copts = tflite_copts() + tflite_copts_warnings() +
- # As xnn_enable_qs8_explicit_true and xnn_enable_qu8_explicit_true
- # could be specified simultaneously, use two selects here.
- select({
- "@XNNPACK//:xnn_enable_qs8_explicit_false": [],
- "@XNNPACK//:xnn_enable_qs8_explicit_true": [
- "-DTFLITE_ALWAYS_CREATE_LAZY_DELEGATE_PROVIDERS",
- ],
- "//tensorflow:emscripten": [
- "-DTFLITE_ALWAYS_CREATE_LAZY_DELEGATE_PROVIDERS",
- ],
- "//conditions:default": [],
- }) + select({
- "@XNNPACK//:xnn_enable_qu8_explicit_false": [],
- "@XNNPACK//:xnn_enable_qu8_explicit_true": [
- "-DTFLITE_ALWAYS_CREATE_LAZY_DELEGATE_PROVIDERS",
- ],
- "//tensorflow:emscripten": [
- "-DTFLITE_ALWAYS_CREATE_LAZY_DELEGATE_PROVIDERS",
- ],
- "//conditions:default": [],
- }),
+ copts = tflite_copts() + tflite_copts_warnings(),
visibility = [
"//tensorflow/lite/core/shims:__subpackages__",
"//tensorflow/lite/delegates/flex:__subpackages__",
@@ -493,6 +472,8 @@
"//tensorflow/lite/core/api",
"//tensorflow/lite/core/api:verifier",
"//tensorflow/lite/delegates:telemetry",
+ "//tensorflow/lite/delegates/xnnpack:tflite_with_xnnpack_qs8",
+ "//tensorflow/lite/delegates/xnnpack:tflite_with_xnnpack_qu8",
"//tensorflow/lite/experimental/resource",
"//tensorflow/lite/internal:signature_def",
"//tensorflow/lite/kernels/internal:compatibility",
diff --git a/tensorflow/lite/delegates/xnnpack/BUILD b/tensorflow/lite/delegates/xnnpack/BUILD
index 5080ffe..8c43bcf 100644
--- a/tensorflow/lite/delegates/xnnpack/BUILD
+++ b/tensorflow/lite/delegates/xnnpack/BUILD
@@ -26,18 +26,98 @@
define_values = {"xnnpack_force_float_precision": "fp16"},
)
+# Enable offloading of quantized 8-bit signed operators to XNNPACK delegate
+config_setting(
+ name = "tflite_with_xnnpack_qs8_explicit_true",
+ define_values = {"tflite_with_xnnpack_qs8": "true"},
+)
+
+# Disable offloading of quantized 8-bit signed operators to XNNPACK delegate
+config_setting(
+ name = "tflite_with_xnnpack_qs8_explicit_false",
+ define_values = {"tflite_with_xnnpack_qs8": "false"},
+)
+
+# Default setting for offloading of quantized 8-bit signed operators to XNNPACK delegate
+cc_library(
+ name = "tflite_with_xnnpack_qs8_default",
+ compatible_with = get_compatible_with_portable(),
+ defines = select({
+ # Enable for WebAssembly builds
+ "//tensorflow:emscripten": ["XNNPACK_DELEGATE_ENABLE_QS8=1"],
+ # Disable for all other builds
+ "//conditions:default": [],
+ }),
+)
+
+cc_library(
+ name = "tflite_with_xnnpack_qs8",
+ compatible_with = get_compatible_with_portable(),
+ defines = select({
+ ":tflite_with_xnnpack_qs8_explicit_true": ["XNNPACK_DELEGATE_ENABLE_QS8=1"],
+ ":tflite_with_xnnpack_qs8_explicit_false": [],
+ "//conditions:default": [],
+ }),
+ deps = select({
+ ":tflite_with_xnnpack_qs8_explicit_true": [],
+ ":tflite_with_xnnpack_qs8_explicit_false": [],
+ "//conditions:default": [":tflite_with_xnnpack_qs8_default"],
+ }),
+)
+
+# Enable offloading of quantized 8-bit unsigned operators to XNNPACK delegate
+config_setting(
+ name = "tflite_with_xnnpack_qu8_explicit_true",
+ define_values = {"tflite_with_xnnpack_qu8": "true"},
+)
+
+# Disable offloading of quantized 8-bit unsigned operators to XNNPACK delegate
+config_setting(
+ name = "tflite_with_xnnpack_qu8_explicit_false",
+ define_values = {"tflite_with_xnnpack_qu8": "false"},
+)
+
+# Default setting for offloading of quantized 8-bit unsigned operators to XNNPACK delegate
+cc_library(
+ name = "tflite_with_xnnpack_qu8_default",
+ compatible_with = get_compatible_with_portable(),
+ defines = select({
+ # Enable for WebAssembly builds
+ "//tensorflow:emscripten": ["XNNPACK_DELEGATE_ENABLE_QU8=1"],
+ # Disable for all other builds
+ "//conditions:default": [],
+ }),
+)
+
+cc_library(
+ name = "tflite_with_xnnpack_qu8",
+ compatible_with = get_compatible_with_portable(),
+ defines = select({
+ ":tflite_with_xnnpack_qu8_explicit_true": ["XNNPACK_DELEGATE_ENABLE_QU8=1"],
+ ":tflite_with_xnnpack_qu8_explicit_false": [],
+ "//conditions:default": [],
+ }),
+ deps = select({
+ ":tflite_with_xnnpack_qu8_explicit_true": [],
+ ":tflite_with_xnnpack_qu8_explicit_false": [],
+ "//conditions:default": [":tflite_with_xnnpack_qu8_default"],
+ }),
+)
+
cc_library(
name = "xnnpack_delegate",
srcs = ["xnnpack_delegate.cc"],
hdrs = ["xnnpack_delegate.h"],
compatible_with = get_compatible_with_portable(),
copts = tflite_copts() + select({
- ":xnnpack_force_float_precision_explicit_fp16": ["-DXNNPACK_FORCE_PRECISION_FP16"],
+ ":xnnpack_force_float_precision_explicit_fp16": ["XNNPACK_DELEGATE_FORCE_PRECISION_FP16=1"],
"//conditions:default": [],
}),
linkstatic = True,
deps = [
":quantization_util",
+ ":tflite_with_xnnpack_qs8",
+ ":tflite_with_xnnpack_qu8",
"//tensorflow/lite:kernel_api",
"//tensorflow/lite:minimal_logging",
"//tensorflow/lite/c:common",
diff --git a/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc b/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc
index dff0642..f74cb00 100644
--- a/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc
+++ b/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc
@@ -88,7 +88,7 @@
}
bool force_fp16() const {
-#ifdef XNNPACK_FORCE_PRECISION_FP16
+#ifdef XNNPACK_DELEGATE_FORCE_PRECISION_FP16
return true;
#else
return (options_.flags & TFLITE_XNNPACK_DELEGATE_FLAG_FORCE_FP16) != 0;
@@ -4429,8 +4429,10 @@
TfLiteXNNPackDelegateOptions options = {0};
// Quantized inference is enabled by default on Web platform
-#ifdef __EMSCRIPTEN__
+#ifdef XNNPACK_DELEGATE_ENABLE_QS8
options.flags |= TFLITE_XNNPACK_DELEGATE_FLAG_QS8;
+#endif
+#ifdef XNNPACK_DELEGATE_ENABLE_QU8
options.flags |= TFLITE_XNNPACK_DELEGATE_FLAG_QU8;
#endif
diff --git a/tensorflow/lite/interpreter_builder.cc b/tensorflow/lite/interpreter_builder.cc
index bf744b1..18767e1 100644
--- a/tensorflow/lite/interpreter_builder.cc
+++ b/tensorflow/lite/interpreter_builder.cc
@@ -144,7 +144,7 @@
}
inline bool ShouldCreateLazyDelegateProviders(int num_fp32_tensors) {
-#ifdef TFLITE_ALWAYS_CREATE_LAZY_DELEGATE_PROVIDERS
+#if defined(XNNPACK_DELEGATE_ENABLE_QS8) || defined(XNNPACK_DELEGATE_ENABLE_QU8)
return true;
#else
return num_fp32_tensors > 0;