blob: 4c077bc64359be9547d1ce15b786207e47a36f90 [file] [log] [blame]
#ifndef TH_GENERIC_FILE
#define TH_GENERIC_FILE "generic/SpatialSubSampling.c"
#else
static inline void THNN_(SpatialSubSampling_shapeCheck)(
THTensor *input,
THTensor *gradOutput,
THTensor *weight,
int kW, int kH) {
int ndims = input->nDimension;
THNN_ARGCHECK(input->nDimension == 3 || input->nDimension == 4, 2, input,
"3D or 4D input tensor expected but got: %s");
THArgCheck(THTensor_(isContiguous)(weight), 4, "weight must be contiguous");
int nInputPlane = THTensor_(size)(weight, 0);
int dimw = 2;
int dimh = 1;
long inputWidth;
long inputHeight;
if (input->nDimension == 4) {
dimw++;
dimh++;
}
inputWidth = input->size[dimw];
inputHeight = input->size[dimh];
THArgCheck(input->size[dimh-1] == nInputPlane, 2, "invalid number of input planes");
THArgCheck(inputWidth >= kW && inputHeight >= kH, 2, "input image smaller than kernel size");
}
void THNN_(SpatialSubSampling_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
THTensor *weight,
THTensor *bias,
int kW, int kH,
int dW, int dH)
{
THArgCheck(!bias || THTensor_(isContiguous)(bias), 5, "bias must be contiguous");
real *weight_data = THTensor_(data)(weight);
real *bias_data = THTensor_(data)(bias);
real *output_data;
real *input_data;
int dimw = 2;
int dimh = 1;
long nbatch = 1;
long inputWidth;
long inputHeight;
long outputWidth;
long outputHeight;
int nInputPlane = THTensor_(size)(weight,0);
long k;
THNN_(SpatialSubSampling_shapeCheck)(input, NULL, weight, kW, kH);
if (input->nDimension == 4) {
nbatch = input->size[0];
dimw++;
dimh++;
}
inputWidth = input->size[dimw];
inputHeight = input->size[dimh];
outputWidth = (inputWidth - kW) / dW + 1;
outputHeight = (inputHeight - kH) / dH + 1;
if (input->nDimension == 3)
THTensor_(resize3d)(output, nInputPlane, outputHeight, outputWidth);
else
THTensor_(resize4d)(output, input->size[0], nInputPlane, outputHeight, outputWidth);
input = THTensor_(newContiguous)(input);
input_data = THTensor_(data)(input);
output_data = THTensor_(data)(output);
#pragma omp parallel for private(k)
for(k = 0; k < nInputPlane; k++)
{
long p;
for(p = 0; p < nbatch; p++)
{
long xx, yy;
/* For all output pixels... */
real *ptr_output = output_data + p*nInputPlane*outputWidth*outputHeight + k*outputWidth*outputHeight;
/* Get the good mask for (k,i) (k out, i in) */
real the_weight = weight_data[k];
/* Initialize to the bias */
real z = bias_data[k];
long i;
for(i = 0; i < outputWidth*outputHeight; i++)
ptr_output[i] = z;
for(yy = 0; yy < outputHeight; yy++)
{
for(xx = 0; xx < outputWidth; xx++)
{
/* Compute the mean of the input image... */
real *ptr_input = input_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight + yy*dH*inputWidth+xx*dW;
real sum = 0;
long kx, ky;
for(ky = 0; ky < kH; ky++)
{
for(kx = 0; kx < kW; kx++)
sum += ptr_input[kx];
ptr_input += inputWidth; /* next input line */
}
/* Update output */
*ptr_output++ += the_weight*sum;
}
}
}
}
THTensor_(free)(input);
}
void THNN_(SpatialSubSampling_updateGradInput)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradInput,
THTensor *weight,
int kW, int kH,
int dW, int dH)
{
THNN_(SpatialSubSampling_shapeCheck)(input, gradOutput, weight, kW, kH);
int dimw = 2;
int dimh = 1;
long nbatch = 1;
long inputWidth;
long inputHeight;
long outputWidth;
long outputHeight;
int nInputPlane = THTensor_(size)(weight,0);
real *weight_data;
real *gradOutput_data;
real *input_data, *gradInput_data;
long k;
if (input->nDimension == 4) {
nbatch = input->size[0];
dimw++;
dimh++;
}
inputWidth = input->size[dimw];
inputHeight = input->size[dimh];
outputWidth = (inputWidth - kW) / dW + 1;
outputHeight = (inputHeight - kH) / dH + 1;
weight_data = THTensor_(data)(weight);
gradOutput = THTensor_(newContiguous)(gradOutput);
gradOutput_data = THTensor_(data)(gradOutput);
input_data = THTensor_(data)(input);
THTensor_(resizeAs)(gradInput, input);
gradInput_data = THTensor_(data)(gradInput);
#pragma omp parallel for private(k)
for(k = 0; k < nInputPlane; k++)
{
long p;
for(p = 0; p < nbatch; p++)
{
real the_weight = weight_data[k];
real *ptr_gradOutput = gradOutput_data + p*nInputPlane*outputHeight*outputWidth + k*outputWidth*outputHeight;
long xx, yy;
real* ptr_gi = gradInput_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight;
long i;
for(i=0; i<inputWidth*inputHeight; i++)
ptr_gi[i] = 0.0;
for(yy = 0; yy < outputHeight; yy++)
{
for(xx = 0; xx < outputWidth; xx++)
{
real *ptr_gradInput = gradInput_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight + yy*dH*inputWidth+xx*dW;
real z = *ptr_gradOutput++ * the_weight;
long kx, ky;
for(ky = 0; ky < kH; ky++)
{
for(kx = 0; kx < kW; kx++)
ptr_gradInput[kx] += z;
ptr_gradInput += inputWidth;
}
}
}
}
}
THTensor_(free)(gradOutput);
}
void THNN_(SpatialSubSampling_accGradParameters)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
THTensor *gradWeight,
THTensor *gradBias,
int kW, int kH,
int dW, int dH,
accreal scale_)
{
real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_);
THNN_(SpatialSubSampling_shapeCheck)(input, gradOutput, gradWeight, kW, kH);
long nbatch = 1;
long dimw = 2;
long dimh = 1;
long inputWidth;
long inputHeight;
long outputWidth;
long outputHeight;
int nInputPlane = THTensor_(size)(gradWeight,0);
real *gradWeight_data;
real *gradBias_data;
real *gradOutput_data;
real *input_data;
long k;
if (input->nDimension == 4) {
dimw++;
dimh++;
nbatch = input->size[0];
}
inputWidth = input->size[dimw];
inputHeight = input->size[dimh];
outputWidth = (inputWidth - kW) / dW + 1;
outputHeight = (inputHeight - kH) / dH + 1;
gradWeight_data = THTensor_(data)(gradWeight);
gradBias_data = THTensor_(data)(gradBias);
gradOutput = THTensor_(newContiguous)(gradOutput);
gradOutput_data = THTensor_(data)(gradOutput);
input = THTensor_(newContiguous)(input);
input_data = THTensor_(data)(input);
#pragma omp parallel for private(k)
for(k = 0; k < nInputPlane; k++)
{
long p;
for(p = 0; p < nbatch; p++)
{
real *ptr_gradOutput = gradOutput_data + p*nInputPlane*outputHeight*outputWidth + k*outputWidth*outputHeight;
real sum;
long xx, yy;
long i;
sum = 0;
for(i = 0; i < outputWidth*outputHeight; i++)
sum += ptr_gradOutput[i];
gradBias_data[k] += scale*sum;
sum = 0;
for(yy = 0; yy < outputHeight; yy++)
{
for(xx = 0; xx < outputWidth; xx++)
{
real *ptr_input = input_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight + yy*dH*inputWidth+xx*dW;
real z = *ptr_gradOutput++;
long kx, ky;
for(ky = 0; ky < kH; ky++)
{
for(kx = 0; kx < kW; kx++)
sum += z * ptr_input[kx];
ptr_input += inputWidth;
}
}
}
gradWeight_data[k] += scale*sum;
}
}
THTensor_(free)(input);
THTensor_(free)(gradOutput);
}
#endif