Using ConvTexture for Buffer intermediate tensors. PiperOrigin-RevId: 273404675

commit: f7ed788a09a17ce1247c6a3d983c08880c932aa9 [log] [tgz]
author: A. Unique TensorFlower <gardener@tensorflow.org> Mon Oct 07 16:36:50 2019 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> Mon Oct 07 21:03:24 2019 -0700
tree: 5df8ef05bb0c3b31930d9ee0ab7cd457fadd6847
parent: 2b060e1a0d44a2d29ca564375ba3a28c5d312852 [diff]
diff --git a/tensorflow/lite/delegates/gpu/cl/environment.cc b/tensorflow/lite/delegates/gpu/cl/environment.cc
index 7677e06..01e3912 100644
--- a/tensorflow/lite/delegates/gpu/cl/environment.cc
+++ b/tensorflow/lite/delegates/gpu/cl/environment.cc

@@ -169,19 +169,6 @@
   }
 }
 
-std::vector<TensorStorageType> Environment::GetSupportedTextureStorages()
-    const {
-  std::vector<TensorStorageType> storage_types = {
-      TensorStorageType::TEXTURE_2D};
-  if (device_.SupportsTextureArray()) {
-    storage_types.push_back(TensorStorageType::TEXTURE_ARRAY);
-  }
-  if (device_.IsAdreno() && device_.SupportsImageBuffer()) {
-    storage_types.push_back(TensorStorageType::IMAGE_BUFFER);
-  }
-  return storage_types;
-}
-
 std::vector<TensorStorageType> Environment::GetSupportedStorages() const {
   std::vector<TensorStorageType> storage_types = {TensorStorageType::TEXTURE_2D,
                                                   TensorStorageType::BUFFER};

diff --git a/tensorflow/lite/delegates/gpu/cl/environment.h b/tensorflow/lite/delegates/gpu/cl/environment.h
index 80df6a9..8492953 100644
--- a/tensorflow/lite/delegates/gpu/cl/environment.h
+++ b/tensorflow/lite/delegates/gpu/cl/environment.h

@@ -55,7 +55,6 @@
 
   std::vector<CalculationsPrecision> GetSupportedPrecisions() const;
   bool IsSupported(CalculationsPrecision precision) const;
-  std::vector<TensorStorageType> GetSupportedTextureStorages() const;
   std::vector<TensorStorageType> GetSupportedStorages() const;
 
   void SetHighPerformance() const;

diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc
index b9c4fe8..e4d5987 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc

@@ -42,8 +42,9 @@
                                  {"dst_size.x", "dst_size.y", "dst_size.z"},
                                  op_def.dst_tensors[0]);
 
-  const bool is_image_buffer =
-      op_def.src_tensors[0].storage_type == TensorStorageType::IMAGE_BUFFER;
+  const auto src_tensor_type = op_def.src_tensors[0].storage_type;
+  const bool is_buffer = src_tensor_type == TensorStorageType::IMAGE_BUFFER ||
+                         src_tensor_type == TensorStorageType::BUFFER;
 
   std::vector<std::string> xs(block_size.x);
   for (int x = 0; x < block_size.x; ++x) {
@@ -139,36 +140,53 @@
     for (int y = 0; y < block_size.y; ++y) {
       c += "  cy" + ys[y] + " = y * dilation.y + yc" + ys[y] + ";\n";
     }
-    if (is_image_buffer) {
+    if (is_buffer) {
       for (int y = 0; y < block_size.y; ++y) {
         c += "  bool in_y" + ys[y] + " = cy" + ys[y] + " >= 0 && cy" + ys[y] +
              " < src_size.y;\n";
+        if (src_tensor_type == TensorStorageType::BUFFER) {
+          c += "    cy" + ys[y] + " = clamp(cy" + ys[y] +
+               ", 0, src_size.y - 1);\n";
+        }
       }
     }
     c += "  for (int x = 0; x < kernel_size.x; ++x) {\n";
     for (int x = 0; x < block_size.x; ++x) {
       c += "  cx" + xs[x] + " = x * dilation.x + xc" + xs[x] + ";\n";
     }
-    if (is_image_buffer) {
+    if (is_buffer) {
       for (int x = 0; x < block_size.x; ++x) {
         c += "  bool in_x" + xs[x] + " = cx" + xs[x] + " >= 0 && cx" + xs[x] +
              " < src_size.x;\n";
+        if (src_tensor_type == TensorStorageType::BUFFER) {
+          c += "    cx" + xs[x] + " = clamp(cx" + xs[x] +
+               ", 0, src_size.x - 1);\n";
+        }
       }
       for (int x = 0; x < block_size.x; ++x) {
         for (int y = 0; y < block_size.y; ++y) {
           const std::string id = std::to_string(y * block_size.x + x);
-          c += absl::Substitute(
-              "  int addr_$0 = select(-1, cy$2 * src_size.x + cx$1, (in_x$1 && "
-              "in_y$2));\n",
-              y * block_size.x + x, x, y);
-          c += absl::Substitute(
-              "  int dz_$0 = select(0, src_size.x * src_size.y, (in_x$1 && "
-              "in_y$2));\n",
-              y * block_size.x + x, x, y);
+          if (src_tensor_type == TensorStorageType::IMAGE_BUFFER) {
+            c += absl::Substitute(
+                "  int addr_$0 = select(-1, cy$2 * src_size.x + cx$1, (in_x$1 "
+                "&& "
+                "in_y$2));\n",
+                y * block_size.x + x, x, y);
+            c += absl::Substitute(
+                "  int dz_$0 = select(0, src_size.x * src_size.y, (in_x$1 && "
+                "in_y$2));\n",
+                y * block_size.x + x, x, y);
+          } else {
+            c += absl::Substitute("  int addr_$0 = cy$2 * src_size.x + cx$1;\n",
+                                  y * block_size.x + x, x, y);
+          }
         }
       }
+      if (src_tensor_type == TensorStorageType::BUFFER) {
+        c += "  int dz = src_size.x * src_size.y;\n";
+      }
     }
-  } else if (is_image_buffer) {
+  } else if (is_buffer) {
     for (int y = 0; y < block_size.y; ++y) {
       c += "  bool in_y" + ys[y] + " = yc" + ys[y] + " >= 0 && yc" + ys[y] +
            " < src_size.y;\n";
@@ -180,23 +198,42 @@
     for (int x = 0; x < block_size.x; ++x) {
       for (int y = 0; y < block_size.y; ++y) {
         const std::string id = std::to_string(y * block_size.x + x);
-        const std::string inside = std::to_string(y * block_size.x + x);
-        c += absl::Substitute(
-            "  int addr_$0 = select(-1, yc$2 * src_size.x + xc$1, (in_x$1 && "
-            "in_y$2));\n",
-            y * block_size.x + x, x, y);
-        c += absl::Substitute(
-            "  int dz_$0 = select(0, src_size.x * src_size.y, (in_x$1 && "
-            "in_y$2));\n",
-            y * block_size.x + x, x, y);
+        if (src_tensor_type == TensorStorageType::IMAGE_BUFFER) {
+          c += absl::Substitute(
+              "  int addr_$0 = select(-1, yc$2 * src_size.x + xc$1, (in_x$1 && "
+              "in_y$2));\n",
+              y * block_size.x + x, x, y);
+          c += absl::Substitute(
+              "  int dz_$0 = select(0, src_size.x * src_size.y, (in_x$1 && "
+              "in_y$2));\n",
+              y * block_size.x + x, x, y);
+        } else {
+          c += absl::Substitute("  int addr_$0 = yc$2 * src_size.x + xc$1;\n",
+                                y * block_size.x + x, x, y);
+        }
       }
     }
+    if (src_tensor_type == TensorStorageType::BUFFER) {
+      c += "  int dz = src_size.x * src_size.y;\n";
+    }
   }
   c += "  for (int s = 0; s < src_size.z; ++s) {\n";
-  if (is_image_buffer) {
-    for (int index = 0; index < block_size.x * block_size.y; ++index) {
-      const std::string id = std::to_string(index);
-      c += "    FLT4 src" + id + " = " + src_tensor.Read("addr_" + id) + ";\n";
+  if (is_buffer) {
+    if (src_tensor_type == TensorStorageType::IMAGE_BUFFER) {
+      for (int index = 0; index < block_size.x * block_size.y; ++index) {
+        const std::string id = std::to_string(index);
+        c +=
+            "    FLT4 src" + id + " = " + src_tensor.Read("addr_" + id) + ";\n";
+      }
+    } else {
+      for (int x = 0; x < block_size.x; ++x) {
+        for (int y = 0; y < block_size.y; ++y) {
+          const std::string id = std::to_string(y * block_size.x + x);
+          c += "    FLT4 src" + id + " = " + src_tensor.Read("addr_" + id) +
+               " * (FLT)(in_x" + xs[x] + " && in_y" + ys[y] + "); addr_" + id +
+               " += dz;\n";
+        }
+      }
     }
   }
   for (int z = 0; z < block_size.z; ++z) {
@@ -208,7 +245,7 @@
 )",
                           fc, z * 4 + 0, z * 4 + 1, z * 4 + 2, z * 4 + 3);
   }
-  if (!is_image_buffer) {
+  if (!is_buffer) {
     const auto mode = GetFastestZeroMode(device);
     for (int x = 0; x < block_size.x; ++x) {
       for (int y = 0; y < block_size.y; ++y) {
@@ -228,10 +265,12 @@
   if (!is1x1) {
     c += "    filter_offset++;\n";
   }
-  if (is_image_buffer) {
-    for (int index = 0; index < block_size.x * block_size.y; ++index) {
-      const std::string id = std::to_string(index);
-      c += "     addr_" + id + " += dz_" + id + ";\n";
+  if (is_buffer) {
+    if (src_tensor_type == TensorStorageType::IMAGE_BUFFER) {
+      for (int index = 0; index < block_size.x * block_size.y; ++index) {
+        const std::string id = std::to_string(index);
+        c += "     addr_" + id + " += dz_" + id + ";\n";
+      }
     }
   }
   c += "  }\n";  // src_size.z

diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture_test.cc
index 82d2f1b..e38d82f 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture_test.cc

@@ -46,7 +46,7 @@
   attr.bias.shape = Linear(1);
   attr.bias.data = {0.0f};
 
-  for (auto storage : env_.GetSupportedTextureStorages()) {
+  for (auto storage : env_.GetSupportedStorages()) {
     for (auto precision : env_.GetSupportedPrecisions()) {
       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
       OperationDef op_def;
@@ -81,7 +81,7 @@
   attr.bias.shape = Linear(2);
   attr.bias.data = {0.5f, -0.5f};
 
-  for (auto storage : env_.GetSupportedTextureStorages()) {
+  for (auto storage : env_.GetSupportedStorages()) {
     for (auto precision : env_.GetSupportedPrecisions()) {
       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
       OperationDef op_def;

diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depth_wise_conv_3x3_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/depth_wise_conv_3x3_test.cc
index 4c52b23..5f1c864 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/depth_wise_conv_3x3_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/depth_wise_conv_3x3_test.cc

@@ -47,7 +47,7 @@
   attr.bias.shape = Linear(2);
   attr.bias.data = {0.0f, 0.0f};
 
-  for (auto storage : env_.GetSupportedTextureStorages()) {
+  for (auto storage : env_.GetSupportedStorages()) {
     for (auto precision : env_.GetSupportedPrecisions()) {
       const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
       OperationDef op_def;

diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc
index 2cda7bc..b570c10 100644
--- a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc
+++ b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc

@@ -39,18 +39,10 @@
     ConvConstants conv;
     RETURN_IF_ERROR(CreateConvConstants(creation_context, op_def, attr, &conv));
     *ptr = absl::make_unique<ConvConstants>(std::move(conv));
-  } else if (op_def.src_tensors[0].storage_type != TensorStorageType::BUFFER) {
+  } else {
     ConvTexture conv;
     RETURN_IF_ERROR(CreateConvTexture(creation_context, op_def, attr, &conv));
     *ptr = absl::make_unique<ConvTexture>(std::move(conv));
-  } else if (IsConvBuffer1x1Supported(op_def, attr)) {
-    ConvBuffer1x1 conv;
-    RETURN_IF_ERROR(CreateConvBuffer1x1(creation_context, op_def, attr, &conv));
-    *ptr = absl::make_unique<ConvBuffer1x1>(std::move(conv));
-  } else {
-    ConvBuffer conv;
-    RETURN_IF_ERROR(CreateConvBuffer(creation_context, op_def, attr, &conv));
-    *ptr = absl::make_unique<ConvBuffer>(std::move(conv));
   }
 
   return OkStatus();
commit	f7ed788a09a17ce1247c6a3d983c08880c932aa9	[log] [tgz]
author	A. Unique TensorFlower <gardener@tensorflow.org>	Mon Oct 07 16:36:50 2019 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	Mon Oct 07 21:03:24 2019 -0700
tree	5df8ef05bb0c3b31930d9ee0ab7cd457fadd6847
parent	2b060e1a0d44a2d29ca564375ba3a28c5d312852 [diff]