Clarified GetStorageTypeWithMinimalMemoryConsumption .

PiperOrigin-RevId: 426189297
Change-Id: I0d98f0ff560cb1516964c2141bdf4727c8579e85
diff --git a/tensorflow/lite/delegates/gpu/cl/environment.cc b/tensorflow/lite/delegates/gpu/cl/environment.cc
index 0fa8857..adafa06 100644
--- a/tensorflow/lite/delegates/gpu/cl/environment.cc
+++ b/tensorflow/lite/delegates/gpu/cl/environment.cc
@@ -228,12 +228,18 @@
       }
     }
   } else if (gpu_info.IsPowerVR()) {
-    return TensorStorageType::BUFFER;
+    if (gpu_info.opencl_info.IsImage2dFromBufferSupported() &&
+        CanUseSubBufferForImage2d(gpu_info)) {
+      return TensorStorageType::TEXTURE_2D;
+    } else {
+      return TensorStorageType::BUFFER;
+    }
   } else if (gpu_info.IsMali()) {
     const MaliInfo mali_info = gpu_info.mali_info;
     if (mali_info.IsMaliT8xx() || mali_info.IsBifrostGen3() ||
         mali_info.IsValhall()) {
-      if (gpu_info.opencl_info.IsImage2dFromBufferSupported()) {
+      if (gpu_info.opencl_info.IsImage2dFromBufferSupported() &&
+          CanUseSubBufferForImage2d(gpu_info)) {
         return TensorStorageType::TEXTURE_2D;
       } else {
         return TensorStorageType::BUFFER;
@@ -253,6 +259,23 @@
   return TensorStorageType::BUFFER;
 }
 
+bool CanUseSubBufferForImage2d(const GpuInfo& gpu_info) {
+  if (!gpu_info.IsCL11OrHigher()) {
+    return false;
+  }
+  if (gpu_info.IsPowerVR()) {
+    // driver issue
+    return false;
+  }
+  if (gpu_info.IsMali() &&
+      (gpu_info.mali_info.IsBifrost() || gpu_info.mali_info.IsMidgard())) {
+    // Known driver issue on some G72 (Bifrost), G76 (Bifrost), T830 (Midgard),
+    // and T880 (Midgard) devices.
+    return false;
+  }
+  return true;
+}
+
 absl::Status CreateEnvironment(Environment* result) {
   CLDevice gpu;
   RETURN_IF_ERROR(CreateDefaultGPUDevice(&gpu));
diff --git a/tensorflow/lite/delegates/gpu/cl/environment.h b/tensorflow/lite/delegates/gpu/cl/environment.h
index 5138e59..a638214 100644
--- a/tensorflow/lite/delegates/gpu/cl/environment.h
+++ b/tensorflow/lite/delegates/gpu/cl/environment.h
@@ -79,6 +79,9 @@
 TensorStorageType GetStorageTypeWithMinimalMemoryConsumption(
     const GpuInfo& gpu_info);
 
+// Checks if image 2D creation from sub-buffer is supported.
+bool CanUseSubBufferForImage2d(const GpuInfo& gpu_info);
+
 absl::Status CreateEnvironment(Environment* result);
 
 }  // namespace cl
diff --git a/tensorflow/lite/delegates/gpu/cl/inference_context.cc b/tensorflow/lite/delegates/gpu/cl/inference_context.cc
index 8420bfc..b08a4c7 100644
--- a/tensorflow/lite/delegates/gpu/cl/inference_context.cc
+++ b/tensorflow/lite/delegates/gpu/cl/inference_context.cc
@@ -96,23 +96,6 @@
                          assignment.object_sizes.end(), static_cast<size_t>(0));
 }
 
-// Checks if sub-buffer image 2D mapping is supported.
-bool CanUseSubBuffer(const GpuInfo& gpu_info) {
-  if (!gpu_info.IsCL11OrHigher()) {
-    return false;
-  }
-  if (gpu_info.IsPowerVR()) {
-    return false;
-  }
-  if (gpu_info.IsMali() &&
-      (gpu_info.mali_info.IsBifrost() || gpu_info.mali_info.IsMidgard())) {
-    // Known driver issue on some G72 (Bifrost), G76 (Bifrost), T830 (Midgard),
-    // and T880 (Midgard) devices.
-    return false;
-  }
-  return true;
-}
-
 }  // namespace
 
 void InferenceContext::ExecutionHints::Init(const GpuInfo& gpu_info) {
@@ -392,6 +375,7 @@
       &buffer_usages);
 
   std::vector<TensorUsageRecord<size_t>> buffer_usage_records;
+  bool has_buffer_based_images = false;
   for (auto& usage : buffer_usages) {
     const auto& t = tensors_descs_[usage.first];
     const auto& shape = t.shape;
@@ -400,6 +384,7 @@
     size_t buffer_size;
     if (descriptor.storage_type == TensorStorageType::TEXTURE_2D ||
         descriptor.storage_type == TensorStorageType::SINGLE_TEXTURE_2D) {
+      has_buffer_based_images = true;
       const size_t bytes_per_pixel =
           element_size *
           (descriptor.storage_type == TensorStorageType::TEXTURE_2D ? 4
@@ -413,6 +398,9 @@
       const size_t width_aligned = AlignByN(width, width_pixel_alignment);
       buffer_size = width_aligned * bytes_per_pixel * height;
     } else {
+      if (descriptor.storage_type == TensorStorageType::IMAGE_BUFFER) {
+        has_buffer_based_images = true;
+      }
       buffer_size =
           shape.b * shape.w * shape.h * AlignByN(shape.c, 4) * element_size;
     }
@@ -427,12 +415,13 @@
   RETURN_IF_ERROR(AssignObjectsToTensors(
       buffer_usage_records, MemoryStrategy::GREEDY_BEST, &buffer_assignment));
 
-  size_t base_align_bytes =
-      std::max<size_t>(gpu_info.opencl_info.base_addr_align_in_bits >> 3, 1);
   bool use_offset_assignment = false;
 
   OffsetsAssignment offset_assignment;
-  if (CanUseSubBuffer(gpu_info)) {
+  if ((!has_buffer_based_images && gpu_info.IsCL11OrHigher()) ||
+      CanUseSubBufferForImage2d(gpu_info)) {
+    const size_t base_align_bytes =
+        std::max<size_t>(gpu_info.opencl_info.base_addr_align_in_bits >> 3, 1);
     RETURN_IF_ERROR(AssignOffsetsToTensors(
         buffer_usage_records, MemoryStrategy::GREEDY_BY_SIZE,
         &offset_assignment, base_align_bytes));