Clarified GetStorageTypeWithMinimalMemoryConsumption .
PiperOrigin-RevId: 426189297
Change-Id: I0d98f0ff560cb1516964c2141bdf4727c8579e85
diff --git a/tensorflow/lite/delegates/gpu/cl/environment.cc b/tensorflow/lite/delegates/gpu/cl/environment.cc
index 0fa8857..adafa06 100644
--- a/tensorflow/lite/delegates/gpu/cl/environment.cc
+++ b/tensorflow/lite/delegates/gpu/cl/environment.cc
@@ -228,12 +228,18 @@
}
}
} else if (gpu_info.IsPowerVR()) {
- return TensorStorageType::BUFFER;
+ if (gpu_info.opencl_info.IsImage2dFromBufferSupported() &&
+ CanUseSubBufferForImage2d(gpu_info)) {
+ return TensorStorageType::TEXTURE_2D;
+ } else {
+ return TensorStorageType::BUFFER;
+ }
} else if (gpu_info.IsMali()) {
const MaliInfo mali_info = gpu_info.mali_info;
if (mali_info.IsMaliT8xx() || mali_info.IsBifrostGen3() ||
mali_info.IsValhall()) {
- if (gpu_info.opencl_info.IsImage2dFromBufferSupported()) {
+ if (gpu_info.opencl_info.IsImage2dFromBufferSupported() &&
+ CanUseSubBufferForImage2d(gpu_info)) {
return TensorStorageType::TEXTURE_2D;
} else {
return TensorStorageType::BUFFER;
@@ -253,6 +259,23 @@
return TensorStorageType::BUFFER;
}
+bool CanUseSubBufferForImage2d(const GpuInfo& gpu_info) {
+ if (!gpu_info.IsCL11OrHigher()) {
+ return false;
+ }
+ if (gpu_info.IsPowerVR()) {
+ // driver issue
+ return false;
+ }
+ if (gpu_info.IsMali() &&
+ (gpu_info.mali_info.IsBifrost() || gpu_info.mali_info.IsMidgard())) {
+ // Known driver issue on some G72 (Bifrost), G76 (Bifrost), T830 (Midgard),
+ // and T880 (Midgard) devices.
+ return false;
+ }
+ return true;
+}
+
absl::Status CreateEnvironment(Environment* result) {
CLDevice gpu;
RETURN_IF_ERROR(CreateDefaultGPUDevice(&gpu));
diff --git a/tensorflow/lite/delegates/gpu/cl/environment.h b/tensorflow/lite/delegates/gpu/cl/environment.h
index 5138e59..a638214 100644
--- a/tensorflow/lite/delegates/gpu/cl/environment.h
+++ b/tensorflow/lite/delegates/gpu/cl/environment.h
@@ -79,6 +79,9 @@
TensorStorageType GetStorageTypeWithMinimalMemoryConsumption(
const GpuInfo& gpu_info);
+// Checks if image 2D creation from sub-buffer is supported.
+bool CanUseSubBufferForImage2d(const GpuInfo& gpu_info);
+
absl::Status CreateEnvironment(Environment* result);
} // namespace cl
diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.cc b/tensorflow/lite/delegates/gpu/cl/inference_context.cc
index 8420bfc..b08a4c7 100644
--- a/tensorflow/lite/delegates/gpu/cl/inference_context.cc
+++ b/tensorflow/lite/delegates/gpu/cl/inference_context.cc
@@ -96,23 +96,6 @@
assignment.object_sizes.end(), static_cast<size_t>(0));
}
-// Checks if sub-buffer image 2D mapping is supported.
-bool CanUseSubBuffer(const GpuInfo& gpu_info) {
- if (!gpu_info.IsCL11OrHigher()) {
- return false;
- }
- if (gpu_info.IsPowerVR()) {
- return false;
- }
- if (gpu_info.IsMali() &&
- (gpu_info.mali_info.IsBifrost() || gpu_info.mali_info.IsMidgard())) {
- // Known driver issue on some G72 (Bifrost), G76 (Bifrost), T830 (Midgard),
- // and T880 (Midgard) devices.
- return false;
- }
- return true;
-}
-
} // namespace
void InferenceContext::ExecutionHints::Init(const GpuInfo& gpu_info) {
@@ -392,6 +375,7 @@
&buffer_usages);
std::vector<TensorUsageRecord<size_t>> buffer_usage_records;
+ bool has_buffer_based_images = false;
for (auto& usage : buffer_usages) {
const auto& t = tensors_descs_[usage.first];
const auto& shape = t.shape;
@@ -400,6 +384,7 @@
size_t buffer_size;
if (descriptor.storage_type == TensorStorageType::TEXTURE_2D ||
descriptor.storage_type == TensorStorageType::SINGLE_TEXTURE_2D) {
+ has_buffer_based_images = true;
const size_t bytes_per_pixel =
element_size *
(descriptor.storage_type == TensorStorageType::TEXTURE_2D ? 4
@@ -413,6 +398,9 @@
const size_t width_aligned = AlignByN(width, width_pixel_alignment);
buffer_size = width_aligned * bytes_per_pixel * height;
} else {
+ if (descriptor.storage_type == TensorStorageType::IMAGE_BUFFER) {
+ has_buffer_based_images = true;
+ }
buffer_size =
shape.b * shape.w * shape.h * AlignByN(shape.c, 4) * element_size;
}
@@ -427,12 +415,13 @@
RETURN_IF_ERROR(AssignObjectsToTensors(
buffer_usage_records, MemoryStrategy::GREEDY_BEST, &buffer_assignment));
- size_t base_align_bytes =
- std::max<size_t>(gpu_info.opencl_info.base_addr_align_in_bits >> 3, 1);
bool use_offset_assignment = false;
OffsetsAssignment offset_assignment;
- if (CanUseSubBuffer(gpu_info)) {
+ if ((!has_buffer_based_images && gpu_info.IsCL11OrHigher()) ||
+ CanUseSubBufferForImage2d(gpu_info)) {
+ const size_t base_align_bytes =
+ std::max<size_t>(gpu_info.opencl_info.base_addr_align_in_bits >> 3, 1);
RETURN_IF_ERROR(AssignOffsetsToTensors(
buffer_usage_records, MemoryStrategy::GREEDY_BY_SIZE,
&offset_assignment, base_align_bytes));