SpatialMaxUnpooling.cu - platform/external/pytorch - Git at Google

 #include "THCUNN.h"
 #include "common.h"

 template <typename Dtype>
 __global__ void MaxUnpoolForward(const int nthreads, const Dtype* bottom_data, const Dtype* bottom_mask,
     const int num, const int channels, const int iheight, const int iwidth, const int oheight, const int owidth, Dtype* top_data) {
   CUDA_KERNEL_LOOP(index, nthreads) { //index here indices the input pixels
     int c = (index / iwidth / iheight) % channels;
     int n = index / iwidth / iheight / channels;
     top_data += (n*channels + c)*oheight*owidth;
     int maxind = bottom_mask[index]-1;

     top_data[maxind] = bottom_data[index];
   }
 }

 template <typename Dtype>
 __global__ void MaxUnpoolBackward(const int nthreads, const Dtype* top_diff, const Dtype* bottom_mask,
     const int num, const int channels, const int iheight, const int iwidth, const int oheight, const int owidth, Dtype* bottom_diff) {
   CUDA_KERNEL_LOOP(index, nthreads) {
     int c = (index / iwidth / iheight) % channels;
     int n = index / iwidth / iheight / channels;
     top_diff += (n*channels + c)*oheight*owidth;
     int maxind = bottom_mask[index]-1;

     bottom_diff[index] = top_diff[maxind];
   }
 }

 void THNN_CudaSpatialMaxUnpooling_updateOutput(THCState *state, THCudaTensor *input, THCudaTensor *output, THCudaTensor *indices, int owidth, int oheight)
 {
   THCUNN_assertSameGPU(state, 3, input, output, indices);
   THArgCheck(input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D (batch) tensor expected");

   long nInputCols, nInputRows, nInputPlane, batchSize;

   if (input->nDimension == 3) {
     nInputCols = input->size[2];
     nInputRows = input->size[1];
     nInputPlane = input->size[0];
     batchSize = 1;
   }
   else
   {
     nInputCols = input->size[3];
     nInputRows = input->size[2];
     nInputPlane = input->size[1];
     batchSize = input->size[0];
   }

   input = THCudaTensor_newContiguous(state, input);
   indices = THCudaTensor_newContiguous(state, indices);
   THCudaTensor_resize4d(state, output, batchSize, nInputPlane, oheight, owidth);
   THCudaTensor_zero(state, output);

   int count = THCudaTensor_nElement(state, input);

   MaxUnpoolForward <<< GET_BLOCKS(count), CUDA_NUM_THREADS, 0, THCState_getCurrentStream(state) >>>
       (count, THCudaTensor_data(state, input), THCudaTensor_data(state, indices),
       batchSize, nInputPlane, nInputRows, nInputCols, oheight, owidth, THCudaTensor_data(state, output));

   if(input->nDimension == 3)
     THCudaTensor_resize3d(state, output, nInputPlane, oheight, owidth);

   THCudaTensor_free(state, input);

   // check for errors
   cudaError_t err = cudaGetLastError();
   if (err != cudaSuccess) {
     printf("error in SpatialMaxUnpooling.updateOutput: %s\n", cudaGetErrorString(err));
     THError("aborting");
   }
 }

 void THNN_CudaSpatialMaxUnpooling_updateGradInput(THCState *state, THCudaTensor *input, THCudaTensor *gradOutput, THCudaTensor *gradInput, THCudaTensor *indices, int owidth, int oheight)
 {
   THCUNN_assertSameGPU(state, 4, input, gradOutput, indices, gradInput);

   long nInputCols, nInputRows, nInputPlane, batchSize;

   if (input->nDimension == 3) {
     nInputCols = input->size[2];
     nInputRows = input->size[1];
     nInputPlane = input->size[0];
     batchSize = 1;
   }
   else
   {
     nInputCols = input->size[3];
     nInputRows = input->size[2];
     nInputPlane = input->size[1];
     batchSize = input->size[0];
   }

   input = THCudaTensor_newContiguous(state, input);
   indices = THCudaTensor_newContiguous(state, indices);
   gradOutput = THCudaTensor_newContiguous(state, gradOutput);
   THCudaTensor_resizeAs(state, gradInput, input);

   int count = THCudaTensor_nElement(state, input);

   MaxUnpoolBackward <<< GET_BLOCKS(count), CUDA_NUM_THREADS, 0, THCState_getCurrentStream(state) >>>
       (count, THCudaTensor_data(state, gradOutput), THCudaTensor_data(state, indices),
       batchSize, nInputPlane, nInputRows, nInputCols, oheight, owidth, THCudaTensor_data(state, gradInput));

   // check for errors
   cudaError_t err = cudaGetLastError();
   if (err != cudaSuccess) {
     printf("error in SpatialMaxUnpooling.updateGradInput: %s\n", cudaGetErrorString(err));
     THError("aborting");
   }
   // clean
   THCudaTensor_free(state, input);
   THCudaTensor_free(state, gradOutput);
 }
	#include "THCUNN.h"
	#include "common.h"

	template <typename Dtype>
	__global__ void MaxUnpoolForward(const int nthreads, const Dtype* bottom_data, const Dtype* bottom_mask,
	const int num, const int channels, const int iheight, const int iwidth, const int oheight, const int owidth, Dtype* top_data) {
	CUDA_KERNEL_LOOP(index, nthreads) { //index here indices the input pixels
	int c = (index / iwidth / iheight) % channels;
	int n = index / iwidth / iheight / channels;
	top_data += (nchannels + c)oheight*owidth;
	int maxind = bottom_mask[index]-1;

	top_data[maxind] = bottom_data[index];
	}
	}

	template <typename Dtype>
	__global__ void MaxUnpoolBackward(const int nthreads, const Dtype* top_diff, const Dtype* bottom_mask,
	const int num, const int channels, const int iheight, const int iwidth, const int oheight, const int owidth, Dtype* bottom_diff) {
	CUDA_KERNEL_LOOP(index, nthreads) {
	int c = (index / iwidth / iheight) % channels;
	int n = index / iwidth / iheight / channels;
	top_diff += (nchannels + c)oheight*owidth;
	int maxind = bottom_mask[index]-1;

	bottom_diff[index] = top_diff[maxind];
	}
	}

	void THNN_CudaSpatialMaxUnpooling_updateOutput(THCState state, THCudaTensor input, THCudaTensor output, THCudaTensor indices, int owidth, int oheight)
	{
	THCUNN_assertSameGPU(state, 3, input, output, indices);
	THArgCheck(input->nDimension == 3 \|\| input->nDimension == 4, 2, "3D or 4D (batch) tensor expected");

	long nInputCols, nInputRows, nInputPlane, batchSize;

	if (input->nDimension == 3) {
	nInputCols = input->size[2];
	nInputRows = input->size[1];
	nInputPlane = input->size[0];
	batchSize = 1;
	}
	else
	{
	nInputCols = input->size[3];
	nInputRows = input->size[2];
	nInputPlane = input->size[1];
	batchSize = input->size[0];
	}

	input = THCudaTensor_newContiguous(state, input);
	indices = THCudaTensor_newContiguous(state, indices);
	THCudaTensor_resize4d(state, output, batchSize, nInputPlane, oheight, owidth);
	THCudaTensor_zero(state, output);

	int count = THCudaTensor_nElement(state, input);

	MaxUnpoolForward <<< GET_BLOCKS(count), CUDA_NUM_THREADS, 0, THCState_getCurrentStream(state) >>>
	(count, THCudaTensor_data(state, input), THCudaTensor_data(state, indices),
	batchSize, nInputPlane, nInputRows, nInputCols, oheight, owidth, THCudaTensor_data(state, output));

	if(input->nDimension == 3)
	THCudaTensor_resize3d(state, output, nInputPlane, oheight, owidth);

	THCudaTensor_free(state, input);

	// check for errors
	cudaError_t err = cudaGetLastError();
	if (err != cudaSuccess) {
	printf("error in SpatialMaxUnpooling.updateOutput: %s\n", cudaGetErrorString(err));
	THError("aborting");
	}
	}

	void THNN_CudaSpatialMaxUnpooling_updateGradInput(THCState state, THCudaTensor input, THCudaTensor gradOutput, THCudaTensor gradInput, THCudaTensor *indices, int owidth, int oheight)
	{
	THCUNN_assertSameGPU(state, 4, input, gradOutput, indices, gradInput);

	long nInputCols, nInputRows, nInputPlane, batchSize;

	if (input->nDimension == 3) {
	nInputCols = input->size[2];
	nInputRows = input->size[1];
	nInputPlane = input->size[0];
	batchSize = 1;
	}
	else
	{
	nInputCols = input->size[3];
	nInputRows = input->size[2];
	nInputPlane = input->size[1];
	batchSize = input->size[0];
	}

	input = THCudaTensor_newContiguous(state, input);
	indices = THCudaTensor_newContiguous(state, indices);
	gradOutput = THCudaTensor_newContiguous(state, gradOutput);
	THCudaTensor_resizeAs(state, gradInput, input);

	int count = THCudaTensor_nElement(state, input);

	MaxUnpoolBackward <<< GET_BLOCKS(count), CUDA_NUM_THREADS, 0, THCState_getCurrentStream(state) >>>
	(count, THCudaTensor_data(state, gradOutput), THCudaTensor_data(state, indices),
	batchSize, nInputPlane, nInputRows, nInputCols, oheight, owidth, THCudaTensor_data(state, gradInput));

	// check for errors
	cudaError_t err = cudaGetLastError();
	if (err != cudaSuccess) {
	printf("error in SpatialMaxUnpooling.updateGradInput: %s\n", cudaGetErrorString(err));
	THError("aborting");
	}
	// clean
	THCudaTensor_free(state, input);
	THCudaTensor_free(state, gradOutput);
	}