make ATen/native/cuda/AdaptiveMaxPooling2d.cu data_ptr-correct (#99164) make ATen/native/cuda/AdaptiveMaxPooling2d.cu data_ptr-correct Test Plan: Rely on CI. Pull Request resolved: https://github.com/pytorch/pytorch/pull/99164 Approved by: https://github.com/ezyang

commit: 306594b2b00d4ee9a96ac1942fe7367b613459c1 [log] [tgz]
author: mikey dagitses <mikeyd@meta.com> Mon Apr 17 08:51:39 2023 +0000
committer: PyTorch MergeBot <pytorchmergebot@users.noreply.github.com> Mon Apr 17 08:51:48 2023 +0000
tree: ac1adb4ed969b21eb4f07135c1dcbfea0ef45312
parent: 314cba964115a9e9ed9aab177c370db99813dffb [diff]
diff --git a/aten/src/ATen/native/cuda/AdaptiveMaxPooling2d.cu b/aten/src/ATen/native/cuda/AdaptiveMaxPooling2d.cu
index bf25a09..d8fab31 100644
--- a/aten/src/ATen/native/cuda/AdaptiveMaxPooling2d.cu
+++ b/aten/src/ATen/native/cuda/AdaptiveMaxPooling2d.cu

@@ -43,7 +43,7 @@
  *    4D input, 4D output, 4D argmax x and y
  */
  template <typename T>
-__global__ void adaptivemaxpool(T *input, T *output, int64_t *indices,
+__global__ void adaptivemaxpool(const T *input, T *output, int64_t *indices,
                         int isizeH, int isizeW,
                         int osizeH, int osizeW,
                         int64_t istrideD, int64_t istrideH, int64_t istrideW)
@@ -81,7 +81,7 @@
       int kW = iendW - istartW;
 
       // Compute the mean of the input image...
-      T *ptr_input = input + istartH*istrideH + istartW*istrideW;
+      const T *ptr_input = input + istartH*istrideH + istartW*istrideW;
       T *ptr_output = output + oh*osizeW + ow;
       int64_t *ptr_ind = indices + oh*osizeW + ow;
       int argmax = istartH * isizeW + istartW;
@@ -109,7 +109,7 @@
  *    this function computes the gradInput from weight and gradOutput
  */
  template <typename T>
-__global__ void adaptivemaxgradinput(T *gradInput, T *gradOutput, int64_t *indices,
+__global__ void adaptivemaxgradinput(T *gradInput, const T *gradOutput, const int64_t *indices,
                              int isizeH, int isizeW,
                              int osizeH, int osizeW)
 {
@@ -139,8 +139,8 @@
 
     for(ow = ostartW; ow < oendW; ow += ostepW) {
 
-      T *ptr_gradOutput = gradOutput + oh*osizeW + ow;
-      int64_t *ptr_ind = indices + oh*osizeW + ow;
+      const T *ptr_gradOutput = gradOutput + oh*osizeW + ow;
+      const int64_t *ptr_ind = indices + oh*osizeW + ow;
       T z = *ptr_gradOutput;
 
       int argmax = (*ptr_ind);
@@ -157,7 +157,7 @@
  */
  template <typename T>
 __global__ void atomicadaptivemaxgradinput(
-  T *gradInput, T *gradOutput, int64_t *indices,
+  T *gradInput, const T *gradOutput, const int64_t *indices,
   int isizeH, int isizeW, int osizeH, int osizeW
 )
 {
@@ -186,8 +186,8 @@
 
     for(ow = ostartW; ow < oendW; ow += ostepW) {
 
-      T *ptr_gradOutput = gradOutput + oh*osizeW + ow;
-      int64_t *ptr_ind = indices + oh*osizeW + ow;
+      const T *ptr_gradOutput = gradOutput + oh*osizeW + ow;
+      const int64_t *ptr_ind = indices + oh*osizeW + ow;
       T z = *ptr_gradOutput;
 
       int argmax = (*ptr_ind);
@@ -233,9 +233,9 @@
 
     AT_DISPATCH_FLOATING_TYPES_AND2(
         kHalf, kBFloat16, input.scalar_type(), "adaptive_max_pool2d_cuda", [&] {
-          scalar_t* input_data = input.data_ptr<scalar_t>();
-          scalar_t* output_data = output_c.data_ptr<scalar_t>();
-          int64_t* indices_data = indices_c.data_ptr<int64_t>();
+          const scalar_t* input_data = input.const_data_ptr<scalar_t>();
+          scalar_t* output_data = output_c.mutable_data_ptr<scalar_t>();
+          int64_t* indices_data = indices_c.mutable_data_ptr<int64_t>();
 
           // cuda blocks & threads:
           int blocksH = (int)(16L / sizeD);
@@ -278,9 +278,9 @@
         input_.scalar_type(),
         "adaptive_max_pool2d_cuda",
         [&] {
-          scalar_t* input_data = input_.data_ptr<scalar_t>();
-          scalar_t* output_data = output_c.data_ptr<scalar_t>();
-          int64_t* indices_data = indices_c.data_ptr<int64_t>();
+          const scalar_t* input_data = input_.const_data_ptr<scalar_t>();
+          scalar_t* output_data = output_c.mutable_data_ptr<scalar_t>();
+          int64_t* indices_data = indices_c.mutable_data_ptr<int64_t>();
 
           // cuda blocks & threads:
           int blocksH = (int)(16L / sizeD);
@@ -362,9 +362,9 @@
         input.scalar_type(),
         "adaptive_max_pool2d_backward_cuda",
         [&] {
-          scalar_t* gradInput_data = gradInput_c.data_ptr<scalar_t>();
-          scalar_t* gradOutput_data = gradOutput_.data_ptr<scalar_t>();
-          int64_t* indices_data = indices_.data_ptr<int64_t>();
+          scalar_t* gradInput_data = gradInput_c.mutable_data_ptr<scalar_t>();
+          const scalar_t* gradOutput_data = gradOutput_.const_data_ptr<scalar_t>();
+          const int64_t* indices_data = indices_.const_data_ptr<int64_t>();
 
           // cuda blocks & threads:
           int blocksH = (int)(16L / sizeD);
@@ -423,9 +423,9 @@
         input.scalar_type(),
         "adaptive_max_pool2d_backward_cuda",
         [&] {
-          scalar_t* gradInput_data = gradInput_c.data_ptr<scalar_t>();
-          scalar_t* gradOutput_data = gradOutput_.data_ptr<scalar_t>();
-          int64_t* indices_data = indices_.data_ptr<int64_t>();
+          scalar_t* gradInput_data = gradInput_c.mutable_data_ptr<scalar_t>();
+          const scalar_t* gradOutput_data = gradOutput_.const_data_ptr<scalar_t>();
+          const int64_t* indices_data = indices_.const_data_ptr<int64_t>();
 
           // cuda blocks & threads:
           int blocksH = (int)(16L / sizeD);
commit	306594b2b00d4ee9a96ac1942fe7367b613459c1	[log] [tgz]
author	mikey dagitses <mikeyd@meta.com>	Mon Apr 17 08:51:39 2023 +0000
committer	PyTorch MergeBot <pytorchmergebot@users.noreply.github.com>	Mon Apr 17 08:51:48 2023 +0000
tree	ac1adb4ed969b21eb4f07135c1dcbfea0ef45312
parent	314cba964115a9e9ed9aab177c370db99813dffb [diff]