Add build-time Bazel options for offloading quantized inference to XNNPACK

Add --define tflite_with_xnnpack_qs8={true|false} Bazel option to
enable/disable offloading of signed 8-bit quantized operators to XNNPACK and
--define tflite_with_xnnpack_qu8={true|false} Bazel option to enable/disable
offloading of unsigned 8-bit quantized operators to XNNPACK. Default to
enabling offloading of quantized inference in WebAssembly builds and disaling
in all other builds (no change w.r.t previous behaviour).

PiperOrigin-RevId: 428930152
Change-Id: Id68c688c5d015197c70b64bac846ad67c92d1537
diff --git a/tensorflow/lite/BUILD b/tensorflow/lite/BUILD
index 4f2f7eb..982191b 100644
--- a/tensorflow/lite/BUILD
+++ b/tensorflow/lite/BUILD
@@ -443,28 +443,7 @@
         "model_builder.h",
     ],
     compatible_with = get_compatible_with_portable(),
-    copts = tflite_copts() + tflite_copts_warnings() +
-            # As xnn_enable_qs8_explicit_true and xnn_enable_qu8_explicit_true
-            # could be specified simultaneously, use two selects here.
-            select({
-                "@XNNPACK//:xnn_enable_qs8_explicit_false": [],
-                "@XNNPACK//:xnn_enable_qs8_explicit_true": [
-                    "-DTFLITE_ALWAYS_CREATE_LAZY_DELEGATE_PROVIDERS",
-                ],
-                "//tensorflow:emscripten": [
-                    "-DTFLITE_ALWAYS_CREATE_LAZY_DELEGATE_PROVIDERS",
-                ],
-                "//conditions:default": [],
-            }) + select({
-        "@XNNPACK//:xnn_enable_qu8_explicit_false": [],
-        "@XNNPACK//:xnn_enable_qu8_explicit_true": [
-            "-DTFLITE_ALWAYS_CREATE_LAZY_DELEGATE_PROVIDERS",
-        ],
-        "//tensorflow:emscripten": [
-            "-DTFLITE_ALWAYS_CREATE_LAZY_DELEGATE_PROVIDERS",
-        ],
-        "//conditions:default": [],
-    }),
+    copts = tflite_copts() + tflite_copts_warnings(),
     visibility = [
         "//tensorflow/lite/core/shims:__subpackages__",
         "//tensorflow/lite/delegates/flex:__subpackages__",
@@ -493,6 +472,8 @@
         "//tensorflow/lite/core/api",
         "//tensorflow/lite/core/api:verifier",
         "//tensorflow/lite/delegates:telemetry",
+        "//tensorflow/lite/delegates/xnnpack:tflite_with_xnnpack_qs8",
+        "//tensorflow/lite/delegates/xnnpack:tflite_with_xnnpack_qu8",
         "//tensorflow/lite/experimental/resource",
         "//tensorflow/lite/internal:signature_def",
         "//tensorflow/lite/kernels/internal:compatibility",
diff --git a/tensorflow/lite/delegates/xnnpack/BUILD b/tensorflow/lite/delegates/xnnpack/BUILD
index 5080ffe..8c43bcf 100644
--- a/tensorflow/lite/delegates/xnnpack/BUILD
+++ b/tensorflow/lite/delegates/xnnpack/BUILD
@@ -26,18 +26,98 @@
     define_values = {"xnnpack_force_float_precision": "fp16"},
 )
 
+# Enable offloading of quantized 8-bit signed operators to XNNPACK delegate
+config_setting(
+    name = "tflite_with_xnnpack_qs8_explicit_true",
+    define_values = {"tflite_with_xnnpack_qs8": "true"},
+)
+
+# Disable offloading of quantized 8-bit signed operators to XNNPACK delegate
+config_setting(
+    name = "tflite_with_xnnpack_qs8_explicit_false",
+    define_values = {"tflite_with_xnnpack_qs8": "false"},
+)
+
+# Default setting for offloading of quantized 8-bit signed operators to XNNPACK delegate
+cc_library(
+    name = "tflite_with_xnnpack_qs8_default",
+    compatible_with = get_compatible_with_portable(),
+    defines = select({
+        # Enable for WebAssembly builds
+        "//tensorflow:emscripten": ["XNNPACK_DELEGATE_ENABLE_QS8=1"],
+        # Disable for all other builds
+        "//conditions:default": [],
+    }),
+)
+
+cc_library(
+    name = "tflite_with_xnnpack_qs8",
+    compatible_with = get_compatible_with_portable(),
+    defines = select({
+        ":tflite_with_xnnpack_qs8_explicit_true": ["XNNPACK_DELEGATE_ENABLE_QS8=1"],
+        ":tflite_with_xnnpack_qs8_explicit_false": [],
+        "//conditions:default": [],
+    }),
+    deps = select({
+        ":tflite_with_xnnpack_qs8_explicit_true": [],
+        ":tflite_with_xnnpack_qs8_explicit_false": [],
+        "//conditions:default": [":tflite_with_xnnpack_qs8_default"],
+    }),
+)
+
+# Enable offloading of quantized 8-bit unsigned operators to XNNPACK delegate
+config_setting(
+    name = "tflite_with_xnnpack_qu8_explicit_true",
+    define_values = {"tflite_with_xnnpack_qu8": "true"},
+)
+
+# Disable offloading of quantized 8-bit unsigned operators to XNNPACK delegate
+config_setting(
+    name = "tflite_with_xnnpack_qu8_explicit_false",
+    define_values = {"tflite_with_xnnpack_qu8": "false"},
+)
+
+# Default setting for offloading of quantized 8-bit unsigned operators to XNNPACK delegate
+cc_library(
+    name = "tflite_with_xnnpack_qu8_default",
+    compatible_with = get_compatible_with_portable(),
+    defines = select({
+        # Enable for WebAssembly builds
+        "//tensorflow:emscripten": ["XNNPACK_DELEGATE_ENABLE_QU8=1"],
+        # Disable for all other builds
+        "//conditions:default": [],
+    }),
+)
+
+cc_library(
+    name = "tflite_with_xnnpack_qu8",
+    compatible_with = get_compatible_with_portable(),
+    defines = select({
+        ":tflite_with_xnnpack_qu8_explicit_true": ["XNNPACK_DELEGATE_ENABLE_QU8=1"],
+        ":tflite_with_xnnpack_qu8_explicit_false": [],
+        "//conditions:default": [],
+    }),
+    deps = select({
+        ":tflite_with_xnnpack_qu8_explicit_true": [],
+        ":tflite_with_xnnpack_qu8_explicit_false": [],
+        "//conditions:default": [":tflite_with_xnnpack_qu8_default"],
+    }),
+)
+
 cc_library(
     name = "xnnpack_delegate",
     srcs = ["xnnpack_delegate.cc"],
     hdrs = ["xnnpack_delegate.h"],
     compatible_with = get_compatible_with_portable(),
     copts = tflite_copts() + select({
-        ":xnnpack_force_float_precision_explicit_fp16": ["-DXNNPACK_FORCE_PRECISION_FP16"],
+        ":xnnpack_force_float_precision_explicit_fp16": ["XNNPACK_DELEGATE_FORCE_PRECISION_FP16=1"],
         "//conditions:default": [],
     }),
     linkstatic = True,
     deps = [
         ":quantization_util",
+        ":tflite_with_xnnpack_qs8",
+        ":tflite_with_xnnpack_qu8",
         "//tensorflow/lite:kernel_api",
         "//tensorflow/lite:minimal_logging",
         "//tensorflow/lite/c:common",
diff --git a/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc b/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc
index dff0642..f74cb00 100644
--- a/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc
+++ b/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.cc
@@ -88,7 +88,7 @@
   }
 
   bool force_fp16() const {
-#ifdef XNNPACK_FORCE_PRECISION_FP16
+#ifdef XNNPACK_DELEGATE_FORCE_PRECISION_FP16
     return true;
 #else
     return (options_.flags & TFLITE_XNNPACK_DELEGATE_FLAG_FORCE_FP16) != 0;
@@ -4429,8 +4429,10 @@
   TfLiteXNNPackDelegateOptions options = {0};
 
   // Quantized inference is enabled by default on Web platform
-#ifdef __EMSCRIPTEN__
+#ifdef XNNPACK_DELEGATE_ENABLE_QS8
   options.flags |= TFLITE_XNNPACK_DELEGATE_FLAG_QS8;
+#endif
+#ifdef XNNPACK_DELEGATE_ENABLE_QU8
   options.flags |= TFLITE_XNNPACK_DELEGATE_FLAG_QU8;
 #endif
 
diff --git a/tensorflow/lite/interpreter_builder.cc b/tensorflow/lite/interpreter_builder.cc
index bf744b1..18767e1 100644
--- a/tensorflow/lite/interpreter_builder.cc
+++ b/tensorflow/lite/interpreter_builder.cc
@@ -144,7 +144,7 @@
 }
 
 inline bool ShouldCreateLazyDelegateProviders(int num_fp32_tensors) {
-#ifdef TFLITE_ALWAYS_CREATE_LAZY_DELEGATE_PROVIDERS
+#if defined(XNNPACK_DELEGATE_ENABLE_QS8) || defined(XNNPACK_DELEGATE_ENABLE_QU8)
   return true;
 #else
   return num_fp32_tensors > 0;