ConvPowerVR modified to support kernel languages without pointers. PiperOrigin-RevId: 383894510 Change-Id: Ib6ff926e5caef7237dbea4e3a274fc480705b5c2

commit: 042b8c59ff6ef6bf75c3d5197f5b53622f0f9659 [log] [tgz]
author: Raman Sarokin <sorokin@google.com> Fri Jul 09 12:04:59 2021 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> Fri Jul 09 12:14:17 2021 -0700
tree: d064706cead9194a0a3dad4f86ab03425dcb4554
parent: 3ef67e3337328209b5d46736a7624366ff2a748a [diff]
diff --git a/tensorflow/lite/delegates/gpu/common/gpu_info.cc b/tensorflow/lite/delegates/gpu/common/gpu_info.cc
index e7cf0dd..860043c 100644
--- a/tensorflow/lite/delegates/gpu/common/gpu_info.cc
+++ b/tensorflow/lite/delegates/gpu/common/gpu_info.cc

@@ -484,6 +484,10 @@
   return true;
 }
 
+bool GpuInfo::SupportsPointersInKernels() const {
+  return IsApiOpenCl() || IsApiMetal();
+}
+
 bool GpuInfo::IsWaveSizeEqualTo32() const {
   return supported_subgroup_sizes.size() == 1 &&
          supported_subgroup_sizes[0] == 32;

diff --git a/tensorflow/lite/delegates/gpu/common/gpu_info.h b/tensorflow/lite/delegates/gpu/common/gpu_info.h
index 1387614..cedd5a2 100644
--- a/tensorflow/lite/delegates/gpu/common/gpu_info.h
+++ b/tensorflow/lite/delegates/gpu/common/gpu_info.h

@@ -361,6 +361,8 @@
   bool SupportsImageBuffer() const;
   bool SupportsImage3D() const;
 
+  bool SupportsPointersInKernels() const;
+
   // returns true if device have fixed wave size equal to 32
   bool IsWaveSizeEqualTo32() const;
   bool SupportsSubGroupWithSize(int sub_group_size) const;

diff --git a/tensorflow/lite/delegates/gpu/common/tasks/conv_powervr.cc b/tensorflow/lite/delegates/gpu/common/tasks/conv_powervr.cc
index 7aef649..3454b8f 100644
--- a/tensorflow/lite/delegates/gpu/common/tasks/conv_powervr.cc
+++ b/tensorflow/lite/delegates/gpu/common/tasks/conv_powervr.cc

@@ -613,17 +613,18 @@
   if (need_local_mem) {
     c += "  __local " + weights_data_type + " weights_cache[" +
          std::to_string(local_mem_size) + "];\n";
-  } else if (conv_params.AreWeightsBuffer()) {
+  } else if (conv_params.AreWeightsBuffer() &&
+             gpu_info.SupportsPointersInKernels()) {
     c += "    " + weights_global_ptr + " weights_cache;\n";
   } else if (!trivial_kernel_size) {
     c += "  int filter_offset = 0;\n";
   }
   if (conv_params.AreWeightsBuffer()) {
+    std::string offset;
     if (conv_params.different_weights_for_height) {
-      c += "  " + weights_global_ptr +
-           " filters_loc = args.weights.GetPtr() + (DST_S * "
-           "args.src_tensor.Height() + DST_Y * " +
-           std::to_string(block_size.w) + ") * 4 * args.src_tensor.Slices();\n";
+      offset = "(DST_S * args.src_tensor.Height() + DST_Y * " +
+               std::to_string(block_size.w) +
+               ") * 4 * args.src_tensor.Slices()";
     } else {
       std::string kernel_spatial_offset = "";
       if (!conv_params_.x_kernel_is_1) {
@@ -635,10 +636,13 @@
       if (src_def.HasAxis(Axis::DEPTH) && !conv_params_.z_kernel_is_1) {
         kernel_spatial_offset += " * args.kernel_size_z";
       }
+      offset = "DST_S * 4 * args.src_tensor.Slices()" + kernel_spatial_offset;
+    }
+    if (gpu_info.SupportsPointersInKernels()) {
       c += "  " + weights_global_ptr +
-           " filters_loc = args.weights.GetPtr() + DST_S * 4 * "
-           "args.src_tensor.Slices()" +
-           kernel_spatial_offset + ";\n";
+           " filters_loc = args.weights.GetPtr() + " + offset + ";\n";
+    } else {
+      c += "  int filters_offset = " + offset + ";\n";
     }
   }
   if (src_def.HasAxis(Axis::DEPTH) && !conv_params_.z_kernel_is_1) {
@@ -848,7 +852,12 @@
                       std::to_string(s * 4 + ch + shared_offset);
                   std::string w_val;
                   if (conv_params.AreWeightsBuffer()) {
-                    w_val = "weights_cache[" + weight_id + "]";
+                    if (gpu_info.SupportsPointersInKernels()) {
+                      w_val = "weights_cache[" + weight_id + "]";
+                    } else {
+                      w_val = "args.weights.Read(filters_offset + " +
+                              weight_id + ")";
+                    }
                   } else {
                     w_val = "f" + weight_id;
                   }
@@ -878,7 +887,12 @@
                 std::string weight_id =
                     std::to_string(s * 4 + i + shared_offset);
                 if (conv_params.AreWeightsBuffer()) {
-                  F[i] = "weights_cache[" + weight_id + "]";
+                  if (gpu_info.SupportsPointersInKernels()) {
+                    F[i] = "weights_cache[" + weight_id + "]";
+                  } else {
+                    F[i] =
+                        "args.weights.Read(filters_offset + " + weight_id + ")";
+                  }
                 } else {
                   F[i] = "f" + weight_id;
                 }
@@ -931,7 +945,9 @@
       c += "    }\n";
     }
   } else if (conv_params.AreWeightsBuffer()) {  // GLOBAL_MEM/CONSTANT_MEM
-    c += "    weights_cache = filters_loc;\n";
+    if (gpu_info.SupportsPointersInKernels()) {
+      c += "    weights_cache = filters_loc;\n";
+    }
   } else {  // TEXTURES_MEM
     for (int dst_s = 0; dst_s < block_size.w; ++dst_s) {
       std::string f_y = trivial_kernel_size ? "s" : "filter_offset";
@@ -964,7 +980,11 @@
     c += "    s += 1;\n";
   }
   if (conv_params.AreWeightsBuffer()) {
-    c += "    filters_loc += " + std::to_string(local_mem_size) + ";\n";
+    if (gpu_info.SupportsPointersInKernels()) {
+      c += "    filters_loc += " + std::to_string(local_mem_size) + ";\n";
+    } else {
+      c += "    filters_offset += " + std::to_string(local_mem_size) + ";\n";
+    }
   }
   c += "  } while (s < args.src_tensor.Slices());\n";
   if (!conv_params.x_kernel_is_1) {
@@ -988,7 +1008,7 @@
                                    "DST_S", "lid", total_work_items,
                                    block_size.w);
       c += "  " + barrier + ";\n";
-    } else {
+    } else if (gpu_info.SupportsPointersInKernels()) {
       c += "  weights_cache = args.biases.GetPtr() + DST_S;\n";
     }
   }
@@ -1023,7 +1043,8 @@
     const std::string sind = std::to_string(s);
     c += "  if (DST_S + " + sind + " >= args.dst_tensor.Slices()) return;\n";
     c += "  {\n";
-    if (conv_params.AreWeightsBuffer()) {
+    if (conv_params.AreWeightsBuffer() &&
+        gpu_info.SupportsPointersInKernels()) {
       c += "    FLT4 bias_val = TO_FLT4(weights_cache[" + sind + "]);\n";
     } else {
       c += "    FLT4 bias_val = args.biases.Read(DST_S + " + sind + ");\n";
commit	042b8c59ff6ef6bf75c3d5197f5b53622f0f9659	[log] [tgz]
author	Raman Sarokin <sorokin@google.com>	Fri Jul 09 12:04:59 2021 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	Fri Jul 09 12:14:17 2021 -0700
tree	d064706cead9194a0a3dad4f86ab03425dcb4554
parent	3ef67e3337328209b5d46736a7624366ff2a748a [diff]