blob: 1d25e8f10fc80a88f5fb4f45491440fa99f14a27 [file] [log] [blame]
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/SpatialDilatedMaxPooling.c"
#else
static inline void THNN_(SpatialDilatedMaxPooling_shapeCheck)(
THTensor *input, THTensor *gradOutput, THIndexTensor *indices,
int kH, int kW, int dH, int dW, int padH, int padW,
int dilationH, int dilationW, bool ceil_mode) {
THArgCheck(kW > 0 && kH > 0, 5,
"kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW);
THArgCheck(dW > 0 && dH > 0, 8,
"stride should be greater than zero, but got dH: %d dW: %d", dH, dW);
int ndim = input->nDimension;
int dimf = 0;
int dimh = 1;
int dimw = 2;
if (ndim == 4) {
dimf++;
dimh++;
dimw++;
}
THNN_ARGCHECK(ndim == 3 || ndim == 4, 2, input,
"3D or 4D input tensor expected but got: %s");
THArgCheck(input->size[dimw] >= kW - padW && input->size[dimh] >= kH - padH, 2,
"input image (H: %d, W: %d) smaller than kernel "
"size - padding( kH: %d padH: %d kW: %d padW: %d",
input->size[dimh], input->size[dimw], kH, padH, kW, padW);
THArgCheck(kW/2 >= padW && kH/2 >= padH, 2,
"pad should be smaller than half of kernel size, but got "
"padW = %d, padH = %d, kW = %d, kH = %d",
padW, padH, kW, kH);
long nInputPlane = input->size[dimh-1];
long inputHeight = input->size[dimh];
long inputWidth = input->size[dimw];
long outputHeight, outputWidth;
long nOutputPlane = nInputPlane;
if (ceil_mode)
{
outputHeight = (long)(ceil((float)(inputHeight - (dilationH * (kH - 1) + 1) + 2*padH) / dH)) + 1;
outputWidth = (long)(ceil((float)(inputWidth - (dilationW * (kW - 1) + 1) + 2*padW) / dW)) + 1;
}
else
{
outputHeight = (long)(floor((float)(inputHeight - (dilationH * (kH - 1) + 1) + 2*padH) / dH)) + 1;
outputWidth = (long)(floor((float)(inputWidth - (dilationW * (kW - 1) + 1) + 2*padW) / dW)) + 1;
}
if (outputWidth < 1 || outputHeight < 1)
THError("Given input size: (%dx%dx%d). "
"Calculated output size: (%dx%dx%d). Output size is too small",
nInputPlane,inputHeight,inputWidth,nInputPlane,outputHeight,outputWidth);
if (gradOutput != NULL) {
THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane);
THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight);
THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth);
}
if (indices != NULL) {
THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimf, nOutputPlane);
THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimh, outputHeight);
THNN_CHECK_DIM_SIZE_INDICES(indices, ndim, dimw, outputWidth);
}
}
static void THNN_(SpatialDilatedMaxPooling_updateOutput_frame)(
real *input_p,
real *output_p,
THIndex_t *ind_p,
long nslices,
long iwidth,
long iheight,
long owidth,
long oheight,
int kW,
int kH,
int dW,
int dH,
int padW,
int padH,
int dilationW,
int dilationH
)
{
long k;
#pragma omp parallel for private(k)
for (k = 0; k < nslices; k++)
{
/* loop over output */
long i, j;
real *ip = input_p + k*iwidth*iheight;
for(i = 0; i < oheight; i++)
{
for(j = 0; j < owidth; j++)
{
long hstart = i * dH - padH;
long wstart = j * dW - padW;
long hend = fminf(hstart + (kH - 1) * dilationH + 1, iheight);
long wend = fminf(wstart + (kW - 1) * dilationW + 1, iwidth);
while(hstart < 0)
hstart += dilationH;
while(wstart < 0)
wstart += dilationW;
/* local pointers */
real *op = output_p + k*owidth*oheight + i*owidth + j;
THIndex_t *indp = ind_p + k*owidth*oheight + i*owidth + j;
/* compute local max: */
long maxindex = -1;
real maxval = -THInf;
long tcntr = 0;
long x,y;
for(y = hstart; y < hend; y += dilationH)
{
for(x = wstart; x < wend; x += dilationW)
{
tcntr = y*iwidth + x;
real val = *(ip + tcntr);
if (val > maxval)
{
maxval = val;
maxindex = tcntr;
}
}
}
/* set output to local max */
*op = maxval;
/* store location of max */
*indp = maxindex + TH_INDEX_BASE;
}
}
}
}
void THNN_(SpatialDilatedMaxPooling_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
THIndexTensor *indices,
int kW,
int kH,
int dW,
int dH,
int padW,
int padH,
int dilationW,
int dilationH,
bool ceil_mode)
{
int dimw = 2;
int dimh = 1;
long nbatch = 1;
long nInputPlane;
long inputHeight;
long inputWidth;
long outputHeight;
long outputWidth;
real *input_data;
real *output_data;
THIndex_t *indices_data;
THNN_(SpatialDilatedMaxPooling_shapeCheck)
(input, NULL, NULL, kH, kW, dH, dW,
padH, padW, dilationH, dilationW, ceil_mode);
if (input->nDimension == 4)
{
nbatch = input->size[0];
dimw++;
dimh++;
}
/* sizes */
nInputPlane = input->size[dimh-1];
inputHeight = input->size[dimh];
inputWidth = input->size[dimw];
if (ceil_mode)
{
outputHeight = (long)(ceil((float)(inputHeight - (dilationH * (kH - 1) + 1) + 2*padH) / dH)) + 1;
outputWidth = (long)(ceil((float)(inputWidth - (dilationW * (kW - 1) + 1) + 2*padW) / dW)) + 1;
}
else
{
outputHeight = (long)(floor((float)(inputHeight - (dilationH * (kH - 1) + 1) + 2*padH) / dH)) + 1;
outputWidth = (long)(floor((float)(inputWidth - (dilationW * (kW - 1) + 1) + 2*padW) / dW)) + 1;
}
if (padW || padH)
{
// ensure that the last pooling starts inside the image
if ((outputHeight - 1)*dH >= inputHeight + padH)
--outputHeight;
if ((outputWidth - 1)*dW >= inputWidth + padW)
--outputWidth;
}
/* get contiguous input */
input = THTensor_(newContiguous)(input);
/* resize output */
if (input->nDimension == 3)
{
THTensor_(resize3d)(output, nInputPlane, outputHeight, outputWidth);
/* indices will contain the locations for each output point */
THIndexTensor_(resize3d)(indices, nInputPlane, outputHeight, outputWidth);
input_data = THTensor_(data)(input);
output_data = THTensor_(data)(output);
indices_data = THIndexTensor_(data)(indices);
THNN_(SpatialDilatedMaxPooling_updateOutput_frame)
(input_data, output_data,
indices_data,
nInputPlane,
inputWidth, inputHeight,
outputWidth, outputHeight,
kW, kH, dW, dH,
padW, padH,
dilationW, dilationH
);
}
else
{
long p;
THTensor_(resize4d)(output, nbatch, nInputPlane, outputHeight, outputWidth);
/* indices will contain the locations for each output point */
THIndexTensor_(resize4d)(indices, nbatch, nInputPlane, outputHeight, outputWidth);
input_data = THTensor_(data)(input);
output_data = THTensor_(data)(output);
indices_data = THIndexTensor_(data)(indices);
#pragma omp parallel for private(p)
for (p = 0; p < nbatch; p++)
{
THNN_(SpatialDilatedMaxPooling_updateOutput_frame)
(input_data+p*nInputPlane*inputWidth*inputHeight,
output_data+p*nInputPlane*outputWidth*outputHeight,
indices_data+p*nInputPlane*outputWidth*outputHeight,
nInputPlane,
inputWidth, inputHeight,
outputWidth, outputHeight,
kW, kH, dW, dH,
padW, padH,
dilationW, dilationH
);
}
}
/* cleanup */
THTensor_(free)(input);
}
static void THNN_(SpatialDilatedMaxPooling_updateGradInput_frame)(
real *gradInput_p,
real *gradOutput_p,
THIndex_t *ind_p,
long nInputPlane,
long inputWidth,
long inputHeight,
long outputWidth,
long outputHeight,
int dW,
int dH)
{
long k;
#pragma omp parallel for private(k)
for (k = 0; k < nInputPlane; k++)
{
real *gradInput_p_k = gradInput_p + k*inputWidth*inputHeight;
real *gradOutput_p_k = gradOutput_p + k*outputWidth*outputHeight;
THIndex_t *ind_p_k = ind_p + k*outputWidth*outputHeight;
/* calculate max points */
long i, j;
for(i = 0; i < outputHeight; i++)
{
for(j = 0; j < outputWidth; j++)
{
/* retrieve position of max */
long maxp = ind_p_k[i*outputWidth + j] - TH_INDEX_BASE;
/* update gradient */
gradInput_p_k[maxp] += gradOutput_p_k[i*outputWidth + j];
}
}
}
}
void THNN_(SpatialDilatedMaxPooling_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THIndexTensor *indices,
int kW,
int kH,
int dW,
int dH,
int padW,
int padH,
int dilationW,
int dilationH,
bool ceil_mode)
{
int dimw = 2;
int dimh = 1;
long nbatch = 1;
int nInputPlane;
int inputHeight;
int inputWidth;
int outputHeight;
int outputWidth;
real *gradInput_data;
real *gradOutput_data;
THIndex_t *indices_data;
THNN_(SpatialDilatedMaxPooling_shapeCheck)
(input, gradOutput, indices, kH, kW, dH, dW,
padH, padW, dilationH, dilationW, ceil_mode);
/* get contiguous gradOutput */
gradOutput = THTensor_(newContiguous)(gradOutput);
/* resize */
THTensor_(resizeAs)(gradInput, input);
THTensor_(zero)(gradInput);
if (input->nDimension == 4) {
nbatch = input->size[0];
dimw++;
dimh++;
}
/* sizes */
nInputPlane = input->size[dimh-1];
inputHeight = input->size[dimh];
inputWidth = input->size[dimw];
outputHeight = gradOutput->size[dimh];
outputWidth = gradOutput->size[dimw];
/* get raw pointers */
gradInput_data = THTensor_(data)(gradInput);
gradOutput_data = THTensor_(data)(gradOutput);
indices_data = THIndexTensor_(data)(indices);
/* backprop */
if (input->nDimension == 3)
{
THNN_(SpatialDilatedMaxPooling_updateGradInput_frame)
(gradInput_data, gradOutput_data,
indices_data,
nInputPlane,
inputWidth, inputHeight,
outputWidth, outputHeight,
dW, dH);
}
else
{
long p;
#pragma omp parallel for private(p)
for (p = 0; p < nbatch; p++)
{
THNN_(SpatialDilatedMaxPooling_updateGradInput_frame)
(gradInput_data+p*nInputPlane*inputWidth*inputHeight,
gradOutput_data+p*nInputPlane*outputWidth*outputHeight,
indices_data+p*nInputPlane*outputWidth*outputHeight,
nInputPlane,
inputWidth, inputHeight,
outputWidth, outputHeight,
dW, dH);
}
}
/* cleanup */
THTensor_(free)(gradOutput);
}
#endif