Using ConvTexture for Buffer intermediate tensors.
PiperOrigin-RevId: 273404675
diff --git a/tensorflow/lite/delegates/gpu/cl/environment.cc b/tensorflow/lite/delegates/gpu/cl/environment.cc
index 7677e06..01e3912 100644
--- a/tensorflow/lite/delegates/gpu/cl/environment.cc
+++ b/tensorflow/lite/delegates/gpu/cl/environment.cc
@@ -169,19 +169,6 @@
}
}
-std::vector<TensorStorageType> Environment::GetSupportedTextureStorages()
- const {
- std::vector<TensorStorageType> storage_types = {
- TensorStorageType::TEXTURE_2D};
- if (device_.SupportsTextureArray()) {
- storage_types.push_back(TensorStorageType::TEXTURE_ARRAY);
- }
- if (device_.IsAdreno() && device_.SupportsImageBuffer()) {
- storage_types.push_back(TensorStorageType::IMAGE_BUFFER);
- }
- return storage_types;
-}
-
std::vector<TensorStorageType> Environment::GetSupportedStorages() const {
std::vector<TensorStorageType> storage_types = {TensorStorageType::TEXTURE_2D,
TensorStorageType::BUFFER};
diff --git a/tensorflow/lite/delegates/gpu/cl/environment.h b/tensorflow/lite/delegates/gpu/cl/environment.h
index 80df6a9..8492953 100644
--- a/tensorflow/lite/delegates/gpu/cl/environment.h
+++ b/tensorflow/lite/delegates/gpu/cl/environment.h
@@ -55,7 +55,6 @@
std::vector<CalculationsPrecision> GetSupportedPrecisions() const;
bool IsSupported(CalculationsPrecision precision) const;
- std::vector<TensorStorageType> GetSupportedTextureStorages() const;
std::vector<TensorStorageType> GetSupportedStorages() const;
void SetHighPerformance() const;
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc
index b9c4fe8..e4d5987 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture.cc
@@ -42,8 +42,9 @@
{"dst_size.x", "dst_size.y", "dst_size.z"},
op_def.dst_tensors[0]);
- const bool is_image_buffer =
- op_def.src_tensors[0].storage_type == TensorStorageType::IMAGE_BUFFER;
+ const auto src_tensor_type = op_def.src_tensors[0].storage_type;
+ const bool is_buffer = src_tensor_type == TensorStorageType::IMAGE_BUFFER ||
+ src_tensor_type == TensorStorageType::BUFFER;
std::vector<std::string> xs(block_size.x);
for (int x = 0; x < block_size.x; ++x) {
@@ -139,36 +140,53 @@
for (int y = 0; y < block_size.y; ++y) {
c += " cy" + ys[y] + " = y * dilation.y + yc" + ys[y] + ";\n";
}
- if (is_image_buffer) {
+ if (is_buffer) {
for (int y = 0; y < block_size.y; ++y) {
c += " bool in_y" + ys[y] + " = cy" + ys[y] + " >= 0 && cy" + ys[y] +
" < src_size.y;\n";
+ if (src_tensor_type == TensorStorageType::BUFFER) {
+ c += " cy" + ys[y] + " = clamp(cy" + ys[y] +
+ ", 0, src_size.y - 1);\n";
+ }
}
}
c += " for (int x = 0; x < kernel_size.x; ++x) {\n";
for (int x = 0; x < block_size.x; ++x) {
c += " cx" + xs[x] + " = x * dilation.x + xc" + xs[x] + ";\n";
}
- if (is_image_buffer) {
+ if (is_buffer) {
for (int x = 0; x < block_size.x; ++x) {
c += " bool in_x" + xs[x] + " = cx" + xs[x] + " >= 0 && cx" + xs[x] +
" < src_size.x;\n";
+ if (src_tensor_type == TensorStorageType::BUFFER) {
+ c += " cx" + xs[x] + " = clamp(cx" + xs[x] +
+ ", 0, src_size.x - 1);\n";
+ }
}
for (int x = 0; x < block_size.x; ++x) {
for (int y = 0; y < block_size.y; ++y) {
const std::string id = std::to_string(y * block_size.x + x);
- c += absl::Substitute(
- " int addr_$0 = select(-1, cy$2 * src_size.x + cx$1, (in_x$1 && "
- "in_y$2));\n",
- y * block_size.x + x, x, y);
- c += absl::Substitute(
- " int dz_$0 = select(0, src_size.x * src_size.y, (in_x$1 && "
- "in_y$2));\n",
- y * block_size.x + x, x, y);
+ if (src_tensor_type == TensorStorageType::IMAGE_BUFFER) {
+ c += absl::Substitute(
+ " int addr_$0 = select(-1, cy$2 * src_size.x + cx$1, (in_x$1 "
+ "&& "
+ "in_y$2));\n",
+ y * block_size.x + x, x, y);
+ c += absl::Substitute(
+ " int dz_$0 = select(0, src_size.x * src_size.y, (in_x$1 && "
+ "in_y$2));\n",
+ y * block_size.x + x, x, y);
+ } else {
+ c += absl::Substitute(" int addr_$0 = cy$2 * src_size.x + cx$1;\n",
+ y * block_size.x + x, x, y);
+ }
}
}
+ if (src_tensor_type == TensorStorageType::BUFFER) {
+ c += " int dz = src_size.x * src_size.y;\n";
+ }
}
- } else if (is_image_buffer) {
+ } else if (is_buffer) {
for (int y = 0; y < block_size.y; ++y) {
c += " bool in_y" + ys[y] + " = yc" + ys[y] + " >= 0 && yc" + ys[y] +
" < src_size.y;\n";
@@ -180,23 +198,42 @@
for (int x = 0; x < block_size.x; ++x) {
for (int y = 0; y < block_size.y; ++y) {
const std::string id = std::to_string(y * block_size.x + x);
- const std::string inside = std::to_string(y * block_size.x + x);
- c += absl::Substitute(
- " int addr_$0 = select(-1, yc$2 * src_size.x + xc$1, (in_x$1 && "
- "in_y$2));\n",
- y * block_size.x + x, x, y);
- c += absl::Substitute(
- " int dz_$0 = select(0, src_size.x * src_size.y, (in_x$1 && "
- "in_y$2));\n",
- y * block_size.x + x, x, y);
+ if (src_tensor_type == TensorStorageType::IMAGE_BUFFER) {
+ c += absl::Substitute(
+ " int addr_$0 = select(-1, yc$2 * src_size.x + xc$1, (in_x$1 && "
+ "in_y$2));\n",
+ y * block_size.x + x, x, y);
+ c += absl::Substitute(
+ " int dz_$0 = select(0, src_size.x * src_size.y, (in_x$1 && "
+ "in_y$2));\n",
+ y * block_size.x + x, x, y);
+ } else {
+ c += absl::Substitute(" int addr_$0 = yc$2 * src_size.x + xc$1;\n",
+ y * block_size.x + x, x, y);
+ }
}
}
+ if (src_tensor_type == TensorStorageType::BUFFER) {
+ c += " int dz = src_size.x * src_size.y;\n";
+ }
}
c += " for (int s = 0; s < src_size.z; ++s) {\n";
- if (is_image_buffer) {
- for (int index = 0; index < block_size.x * block_size.y; ++index) {
- const std::string id = std::to_string(index);
- c += " FLT4 src" + id + " = " + src_tensor.Read("addr_" + id) + ";\n";
+ if (is_buffer) {
+ if (src_tensor_type == TensorStorageType::IMAGE_BUFFER) {
+ for (int index = 0; index < block_size.x * block_size.y; ++index) {
+ const std::string id = std::to_string(index);
+ c +=
+ " FLT4 src" + id + " = " + src_tensor.Read("addr_" + id) + ";\n";
+ }
+ } else {
+ for (int x = 0; x < block_size.x; ++x) {
+ for (int y = 0; y < block_size.y; ++y) {
+ const std::string id = std::to_string(y * block_size.x + x);
+ c += " FLT4 src" + id + " = " + src_tensor.Read("addr_" + id) +
+ " * (FLT)(in_x" + xs[x] + " && in_y" + ys[y] + "); addr_" + id +
+ " += dz;\n";
+ }
+ }
}
}
for (int z = 0; z < block_size.z; ++z) {
@@ -208,7 +245,7 @@
)",
fc, z * 4 + 0, z * 4 + 1, z * 4 + 2, z * 4 + 3);
}
- if (!is_image_buffer) {
+ if (!is_buffer) {
const auto mode = GetFastestZeroMode(device);
for (int x = 0; x < block_size.x; ++x) {
for (int y = 0; y < block_size.y; ++y) {
@@ -228,10 +265,12 @@
if (!is1x1) {
c += " filter_offset++;\n";
}
- if (is_image_buffer) {
- for (int index = 0; index < block_size.x * block_size.y; ++index) {
- const std::string id = std::to_string(index);
- c += " addr_" + id + " += dz_" + id + ";\n";
+ if (is_buffer) {
+ if (src_tensor_type == TensorStorageType::IMAGE_BUFFER) {
+ for (int index = 0; index < block_size.x * block_size.y; ++index) {
+ const std::string id = std::to_string(index);
+ c += " addr_" + id + " += dz_" + id + ";\n";
+ }
}
}
c += " }\n"; // src_size.z
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture_test.cc
index 82d2f1b..e38d82f 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/conv_texture_test.cc
@@ -46,7 +46,7 @@
attr.bias.shape = Linear(1);
attr.bias.data = {0.0f};
- for (auto storage : env_.GetSupportedTextureStorages()) {
+ for (auto storage : env_.GetSupportedStorages()) {
for (auto precision : env_.GetSupportedPrecisions()) {
const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
OperationDef op_def;
@@ -81,7 +81,7 @@
attr.bias.shape = Linear(2);
attr.bias.data = {0.5f, -0.5f};
- for (auto storage : env_.GetSupportedTextureStorages()) {
+ for (auto storage : env_.GetSupportedStorages()) {
for (auto precision : env_.GetSupportedPrecisions()) {
const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
OperationDef op_def;
diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depth_wise_conv_3x3_test.cc b/tensorflow/lite/delegates/gpu/cl/kernels/depth_wise_conv_3x3_test.cc
index 4c52b23..5f1c864 100644
--- a/tensorflow/lite/delegates/gpu/cl/kernels/depth_wise_conv_3x3_test.cc
+++ b/tensorflow/lite/delegates/gpu/cl/kernels/depth_wise_conv_3x3_test.cc
@@ -47,7 +47,7 @@
attr.bias.shape = Linear(2);
attr.bias.data = {0.0f, 0.0f};
- for (auto storage : env_.GetSupportedTextureStorages()) {
+ for (auto storage : env_.GetSupportedStorages()) {
for (auto precision : env_.GetSupportedPrecisions()) {
const float eps = precision == CalculationsPrecision::F32 ? 1e-6f : 1e-3f;
OperationDef op_def;
diff --git a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc
index 2cda7bc..b570c10 100644
--- a/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc
+++ b/tensorflow/lite/delegates/gpu/cl/selectors/convolution_selector.cc
@@ -39,18 +39,10 @@
ConvConstants conv;
RETURN_IF_ERROR(CreateConvConstants(creation_context, op_def, attr, &conv));
*ptr = absl::make_unique<ConvConstants>(std::move(conv));
- } else if (op_def.src_tensors[0].storage_type != TensorStorageType::BUFFER) {
+ } else {
ConvTexture conv;
RETURN_IF_ERROR(CreateConvTexture(creation_context, op_def, attr, &conv));
*ptr = absl::make_unique<ConvTexture>(std::move(conv));
- } else if (IsConvBuffer1x1Supported(op_def, attr)) {
- ConvBuffer1x1 conv;
- RETURN_IF_ERROR(CreateConvBuffer1x1(creation_context, op_def, attr, &conv));
- *ptr = absl::make_unique<ConvBuffer1x1>(std::move(conv));
- } else {
- ConvBuffer conv;
- RETURN_IF_ERROR(CreateConvBuffer(creation_context, op_def, attr, &conv));
- *ptr = absl::make_unique<ConvBuffer>(std::move(conv));
}
return OkStatus();