make ATen/native/cuda/AdaptiveMaxPooling2d.cu data_ptr-correct (#99164)
make ATen/native/cuda/AdaptiveMaxPooling2d.cu data_ptr-correct
Test Plan: Rely on CI.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/99164
Approved by: https://github.com/ezyang
diff --git a/aten/src/ATen/native/cuda/AdaptiveMaxPooling2d.cu b/aten/src/ATen/native/cuda/AdaptiveMaxPooling2d.cu
index bf25a09..d8fab31 100644
--- a/aten/src/ATen/native/cuda/AdaptiveMaxPooling2d.cu
+++ b/aten/src/ATen/native/cuda/AdaptiveMaxPooling2d.cu
@@ -43,7 +43,7 @@
* 4D input, 4D output, 4D argmax x and y
*/
template <typename T>
-__global__ void adaptivemaxpool(T *input, T *output, int64_t *indices,
+__global__ void adaptivemaxpool(const T *input, T *output, int64_t *indices,
int isizeH, int isizeW,
int osizeH, int osizeW,
int64_t istrideD, int64_t istrideH, int64_t istrideW)
@@ -81,7 +81,7 @@
int kW = iendW - istartW;
// Compute the mean of the input image...
- T *ptr_input = input + istartH*istrideH + istartW*istrideW;
+ const T *ptr_input = input + istartH*istrideH + istartW*istrideW;
T *ptr_output = output + oh*osizeW + ow;
int64_t *ptr_ind = indices + oh*osizeW + ow;
int argmax = istartH * isizeW + istartW;
@@ -109,7 +109,7 @@
* this function computes the gradInput from weight and gradOutput
*/
template <typename T>
-__global__ void adaptivemaxgradinput(T *gradInput, T *gradOutput, int64_t *indices,
+__global__ void adaptivemaxgradinput(T *gradInput, const T *gradOutput, const int64_t *indices,
int isizeH, int isizeW,
int osizeH, int osizeW)
{
@@ -139,8 +139,8 @@
for(ow = ostartW; ow < oendW; ow += ostepW) {
- T *ptr_gradOutput = gradOutput + oh*osizeW + ow;
- int64_t *ptr_ind = indices + oh*osizeW + ow;
+ const T *ptr_gradOutput = gradOutput + oh*osizeW + ow;
+ const int64_t *ptr_ind = indices + oh*osizeW + ow;
T z = *ptr_gradOutput;
int argmax = (*ptr_ind);
@@ -157,7 +157,7 @@
*/
template <typename T>
__global__ void atomicadaptivemaxgradinput(
- T *gradInput, T *gradOutput, int64_t *indices,
+ T *gradInput, const T *gradOutput, const int64_t *indices,
int isizeH, int isizeW, int osizeH, int osizeW
)
{
@@ -186,8 +186,8 @@
for(ow = ostartW; ow < oendW; ow += ostepW) {
- T *ptr_gradOutput = gradOutput + oh*osizeW + ow;
- int64_t *ptr_ind = indices + oh*osizeW + ow;
+ const T *ptr_gradOutput = gradOutput + oh*osizeW + ow;
+ const int64_t *ptr_ind = indices + oh*osizeW + ow;
T z = *ptr_gradOutput;
int argmax = (*ptr_ind);
@@ -233,9 +233,9 @@
AT_DISPATCH_FLOATING_TYPES_AND2(
kHalf, kBFloat16, input.scalar_type(), "adaptive_max_pool2d_cuda", [&] {
- scalar_t* input_data = input.data_ptr<scalar_t>();
- scalar_t* output_data = output_c.data_ptr<scalar_t>();
- int64_t* indices_data = indices_c.data_ptr<int64_t>();
+ const scalar_t* input_data = input.const_data_ptr<scalar_t>();
+ scalar_t* output_data = output_c.mutable_data_ptr<scalar_t>();
+ int64_t* indices_data = indices_c.mutable_data_ptr<int64_t>();
// cuda blocks & threads:
int blocksH = (int)(16L / sizeD);
@@ -278,9 +278,9 @@
input_.scalar_type(),
"adaptive_max_pool2d_cuda",
[&] {
- scalar_t* input_data = input_.data_ptr<scalar_t>();
- scalar_t* output_data = output_c.data_ptr<scalar_t>();
- int64_t* indices_data = indices_c.data_ptr<int64_t>();
+ const scalar_t* input_data = input_.const_data_ptr<scalar_t>();
+ scalar_t* output_data = output_c.mutable_data_ptr<scalar_t>();
+ int64_t* indices_data = indices_c.mutable_data_ptr<int64_t>();
// cuda blocks & threads:
int blocksH = (int)(16L / sizeD);
@@ -362,9 +362,9 @@
input.scalar_type(),
"adaptive_max_pool2d_backward_cuda",
[&] {
- scalar_t* gradInput_data = gradInput_c.data_ptr<scalar_t>();
- scalar_t* gradOutput_data = gradOutput_.data_ptr<scalar_t>();
- int64_t* indices_data = indices_.data_ptr<int64_t>();
+ scalar_t* gradInput_data = gradInput_c.mutable_data_ptr<scalar_t>();
+ const scalar_t* gradOutput_data = gradOutput_.const_data_ptr<scalar_t>();
+ const int64_t* indices_data = indices_.const_data_ptr<int64_t>();
// cuda blocks & threads:
int blocksH = (int)(16L / sizeD);
@@ -423,9 +423,9 @@
input.scalar_type(),
"adaptive_max_pool2d_backward_cuda",
[&] {
- scalar_t* gradInput_data = gradInput_c.data_ptr<scalar_t>();
- scalar_t* gradOutput_data = gradOutput_.data_ptr<scalar_t>();
- int64_t* indices_data = indices_.data_ptr<int64_t>();
+ scalar_t* gradInput_data = gradInput_c.mutable_data_ptr<scalar_t>();
+ const scalar_t* gradOutput_data = gradOutput_.const_data_ptr<scalar_t>();
+ const int64_t* indices_data = indices_.const_data_ptr<int64_t>();
// cuda blocks & threads:
int blocksH = (int)(16L / sizeD);