| #ifndef TH_GENERIC_FILE |
| #define TH_GENERIC_FILE "generic/SpatialConvolutionMM.c" |
| #else |
| |
| #include <ATen/div_rtn.h> |
| |
| static inline void THNN_(SpatialConvolutionMM_shapeCheck)( |
| THTensor *input, THTensor *gradOutput, |
| THTensor *weight, THTensor *bias, |
| int kH, int kW, int dH, int dW, int padH, int padW, int weight_nullable) { |
| |
| THArgCheck(kW > 0 && kH > 0, 9, |
| "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); |
| THArgCheck(dW > 0 && dH > 0, 11, |
| "stride should be greater than zero, but got dH: %d dW: %d", dH, dW); |
| |
| if (weight != NULL) { |
| THNN_ARGCHECK(!weight->is_empty() && (weight->dim() == 2 || weight->dim() == 4), 5, weight, |
| "non-empty 2D or 4D weight tensor expected, but got: %s"); |
| if (bias != NULL) { |
| THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size(0)); |
| } |
| } else if (!weight_nullable) { |
| THError("weight tensor is expected to be non-nullable"); |
| } |
| |
| int ndim = input->dim(); |
| int dimf = 0; |
| int dimh = 1; |
| int dimw = 2; |
| |
| if (ndim == 4) { |
| dimf++; |
| dimh++; |
| dimw++; |
| } |
| |
| THNN_ARGCHECK(!input->is_empty() && (ndim == 3 || ndim == 4), 2, input, |
| "non-empty 3D or 4D input tensor expected but got: %s"); |
| |
| int64_t inputHeight = input->size(dimh); |
| int64_t inputWidth = input->size(dimw); |
| |
| int64_t exactInputHeight = inputHeight + 2 * padH; |
| int64_t exactInputWidth = inputWidth + 2 * padW; |
| |
| if (exactInputHeight < kH || exactInputWidth < kW) { |
| THError("Calculated padded input size per channel: (%ld x %ld). " |
| "Kernel size: (%ld x %ld). Kernel size can't be greater than actual input size", |
| exactInputHeight, exactInputWidth, kH, kW); |
| } |
| |
| int64_t outputHeight = div_rtn<int64_t>(exactInputHeight - kH, dH) + 1; |
| int64_t outputWidth = div_rtn<int64_t>(exactInputWidth - kW, dW) + 1; |
| |
| if (outputWidth < 1 || outputHeight < 1) { |
| THError("Given input size per channel: (%ld x %ld). " |
| "Calculated output size per channel: (%ld x %ld). Output size is too small", |
| inputHeight, inputWidth, outputHeight, outputWidth); |
| } |
| |
| if (weight != NULL) { |
| int64_t nInputPlane = weight->size(1); |
| if (weight->dim() == 2) { |
| nInputPlane /= (kH * kW); |
| } |
| THNN_CHECK_DIM_SIZE(input, ndim, dimf, nInputPlane); |
| } |
| |
| if (gradOutput != NULL) { |
| if (weight != NULL) { |
| int64_t nOutputPlane = weight->size(0); |
| THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane); |
| } else if (bias != NULL) { |
| int64_t nOutputPlane = bias->size(0); |
| THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimf, nOutputPlane); |
| } |
| THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimh, outputHeight); |
| THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimw, outputWidth); |
| } |
| } |
| |
| static THTensor* THNN_(newViewWeightMM2d)(THTensor *weight) { |
| weight = THTensor_(newContiguous)(weight); |
| if (weight->dim() == 4) { |
| int64_t s1 = weight->size(0); |
| int64_t s2 = weight->size(1) * weight->size(2) * weight->size(3); |
| THTensor *old_weight = weight; |
| weight = THTensor_(newWithStorage2d)(THTensor_getStoragePtr(weight), weight->storage_offset(), |
| s1, -1, s2, -1); |
| THTensor_(free)(old_weight); |
| } |
| return weight; |
| } |
| |
| static void THNN_(SpatialConvolutionMM_updateOutput_frame)( |
| THTensor *input, |
| THTensor *output, |
| THTensor *weight, |
| THTensor *bias, |
| THTensor *finput, |
| int kW, |
| int kH, |
| int dW, |
| int dH, |
| int padW, |
| int padH, |
| int64_t nInputPlane, |
| int64_t inputWidth, |
| int64_t inputHeight, |
| int64_t nOutputPlane, |
| int64_t outputWidth, |
| int64_t outputHeight) |
| { |
| int64_t i; |
| THTensor *output2d; |
| |
| THNN_(unfolded_copy)(finput, input, kW, kH, dW, dH, padW, padH, |
| nInputPlane, inputWidth, inputHeight, |
| outputWidth, outputHeight); |
| |
| output2d = THTensor_(newWithStorage2d)(THTensor_getStoragePtr(output), output->storage_offset(), |
| nOutputPlane, -1, |
| outputHeight*outputWidth, -1); |
| if (bias) { |
| for(i = 0; i < nOutputPlane; i++) |
| THVector_(fill) |
| (THStorage_(data)(THTensor_getStoragePtr(output)) + output->storage_offset() + output->stride(0) * i, |
| THTensor_(get1d)(bias, i), outputHeight*outputWidth); |
| } else { |
| THTensor_(zero)(output); |
| } |
| |
| THTensor_(addmm)(output2d, 1, output2d, 1, weight, finput); |
| |
| THTensor_(free)(output2d); |
| } |
| |
| void THNN_(SpatialConvolutionMM_updateOutput)( |
| THNNState *state, |
| THTensor *input, |
| THTensor *output, |
| THTensor *weight, |
| THTensor *bias, |
| THTensor *finput, |
| THTensor *fgradInput, |
| int kW, |
| int kH, |
| int dW, |
| int dH, |
| int padW, |
| int padH) |
| { |
| weight = THNN_(newViewWeightMM2d)(weight); |
| |
| THNN_(SpatialConvolutionMM_shapeCheck) |
| (input, NULL, weight, bias, kH, kW, dH, dW, padH, padW, 0); |
| |
| input = THTensor_(newContiguous)(input); |
| int ndim = input->dim(); |
| int dimf = 0; |
| int dimh = 1; |
| int dimw = 2; |
| |
| if (ndim == 4) { |
| dimf++; |
| dimh++; |
| dimw++; |
| } |
| |
| int64_t nInputPlane = input->size(dimf); |
| int64_t inputHeight = input->size(dimh); |
| int64_t inputWidth = input->size(dimw); |
| int64_t nOutputPlane = weight->size(0); |
| int64_t outputHeight = (inputHeight + 2*padH - kH) / dH + 1; |
| int64_t outputWidth = (inputWidth + 2*padW - kW) / dW + 1; |
| |
| if(input->dim() == 3) |
| { |
| THTensor_(resize2d)(finput, kW*kH*nInputPlane, outputHeight*outputWidth); |
| THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth); |
| |
| THNN_(SpatialConvolutionMM_updateOutput_frame) |
| (input, output, weight, bias, finput, |
| kW, kH, dW, dH, padW, padH, |
| nInputPlane, inputWidth, inputHeight, |
| nOutputPlane, outputWidth, outputHeight); |
| } |
| else |
| { |
| int64_t T = input->size(0); |
| int64_t t; |
| |
| THTensor_(resize3d)(finput, T, kW*kH*nInputPlane, outputHeight*outputWidth); |
| THTensor_(resize4d)(output, T, nOutputPlane, outputHeight, outputWidth); |
| |
| #pragma omp parallel for private(t) |
| for(t = 0; t < T; t++) |
| { |
| THTensor *input_t = THTensor_(newSelect)(input, 0, t); |
| THTensor *output_t = THTensor_(newSelect)(output, 0, t); |
| THTensor *finput_t = THTensor_(newSelect)(finput, 0, t); |
| |
| THNN_(SpatialConvolutionMM_updateOutput_frame) |
| (input_t, output_t, weight, bias, finput_t, |
| kW, kH, dW, dH, padW, padH, |
| nInputPlane, inputWidth, inputHeight, |
| nOutputPlane, outputWidth, outputHeight); |
| |
| THTensor_(free)(input_t); |
| THTensor_(free)(output_t); |
| THTensor_(free)(finput_t); |
| } |
| } |
| |
| THTensor_(free)(input); |
| THTensor_(free)(weight); |
| } |
| |
| static void THNN_(SpatialConvolutionMM_updateGradInput_frame)( |
| THTensor *gradInput, |
| THTensor *gradOutput, |
| THTensor *weight, |
| THTensor *fgradInput, |
| int kW, |
| int kH, |
| int dW, |
| int dH, |
| int padW, |
| int padH) |
| { |
| THTensor *gradOutput2d = THTensor_(newWithStorage2d) |
| (THTensor_getStoragePtr(gradOutput), gradOutput->storage_offset(), |
| gradOutput->size(0), -1, |
| gradOutput->size(1)*gradOutput->size(2), -1); |
| THTensor_(addmm)(fgradInput, 0, fgradInput, 1, weight, gradOutput2d); |
| THTensor_(free)(gradOutput2d); |
| |
| THTensor_(zero)(gradInput); |
| |
| THNN_(unfolded_acc)(fgradInput, gradInput, kW, kH, dW, dH, |
| padW, padH, |
| gradInput->size(0), gradInput->size(2), gradInput->size(1), |
| gradOutput->size(2), gradOutput->size(1)); |
| } |
| |
| void THNN_(SpatialConvolutionMM_updateGradInput)( |
| THNNState *state, |
| THTensor *input, |
| THTensor *gradOutput, |
| THTensor *gradInput, |
| THTensor *weight, |
| THTensor *finput, |
| THTensor *fgradInput, |
| int kW, |
| int kH, |
| int dW, |
| int dH, |
| int padW, |
| int padH) |
| { |
| weight = THNN_(newViewWeightMM2d)(weight); |
| |
| THNN_(SpatialConvolutionMM_shapeCheck) |
| (input, gradOutput, weight, NULL, kH, kW, dH, dW, padH, padW, 0); |
| |
| input = THTensor_(newContiguous)(input); |
| gradOutput = THTensor_(newContiguous)(gradOutput); |
| |
| THTensor_(resizeAs)(gradInput, input); |
| THTensor_(resizeAs)(fgradInput, finput); |
| |
| // depending on the BLAS library, fgradInput (result tensor) might |
| // be left uninitialized on zero alpha, which might lead to weird behavior |
| // hence, to be safe, zero it |
| THTensor_(zero)(fgradInput); |
| THTensor *tweight = THTensor_(new)(); |
| THTensor_(transpose)(tweight, weight, 0, 1); |
| |
| if(input->dim() == 3) |
| { |
| THNN_(SpatialConvolutionMM_updateGradInput_frame)(gradInput, gradOutput, |
| tweight, fgradInput, |
| kW, kH, dW, dH, padW, padH); |
| } |
| else |
| { |
| int64_t T = input->size(0); |
| int64_t t; |
| |
| #pragma omp parallel for private(t) |
| for(t = 0; t < T; t++) |
| { |
| THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t); |
| THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); |
| THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t); |
| |
| THNN_(SpatialConvolutionMM_updateGradInput_frame)(gradInput_t, gradOutput_t, |
| tweight, fgradInput_t, |
| kW, kH, dW, dH, padW, padH); |
| |
| THTensor_(free)(gradInput_t); |
| THTensor_(free)(gradOutput_t); |
| THTensor_(free)(fgradInput_t); |
| } |
| } |
| |
| THTensor_(free)(tweight); |
| THTensor_(free)(input); |
| THTensor_(free)(gradOutput); |
| THTensor_(free)(weight); |
| } |
| |
| static void THNN_(SpatialConvolutionMM_accGradParameters_frame)( |
| THTensor *gradOutput, |
| THTensor *gradWeight, |
| THTensor *gradBias, |
| THTensor *finput, |
| real scale) |
| { |
| int64_t i; |
| THTensor *gradOutput2d = THTensor_(newWithStorage2d) |
| (THTensor_getStoragePtr(gradOutput), gradOutput->storage_offset(), |
| gradOutput->size(0), -1, |
| gradOutput->size(1)*gradOutput->size(2), -1); |
| |
| if (gradWeight) { |
| THTensor *tfinput = THTensor_(new)(); |
| THTensor_(transpose)(tfinput, finput, 0, 1); |
| THTensor_(addmm)(gradWeight, 1, gradWeight, scale, gradOutput2d, tfinput); |
| THTensor_(free)(tfinput); |
| } |
| |
| if (gradBias) { |
| for(i = 0; i < gradBias->size(0); i++) |
| { |
| int64_t k; |
| real sum = 0; |
| real *data = THStorage_(data)(THTensor_getStoragePtr(gradOutput2d)) + gradOutput2d->storage_offset() + i*gradOutput2d->stride(0); |
| for(k = 0; k < gradOutput2d->size(1); k++) |
| sum += data[k]; |
| (THStorage_(data)(THTensor_getStoragePtr(gradBias)) + gradBias->storage_offset())[i] += scale*sum; |
| } |
| } |
| |
| THTensor_(free)(gradOutput2d); |
| } |
| |
| void THNN_(SpatialConvolutionMM_accGradParameters)( |
| THNNState *state, |
| THTensor *input, |
| THTensor *gradOutput, |
| THTensor *gradWeight, |
| THTensor *gradBias, |
| THTensor *finput, // can be NULL if gradWeight = NULL |
| THTensor *fgradInput, |
| int kW, |
| int kH, |
| int dW, |
| int dH, |
| int padW, |
| int padH, |
| accreal scale_) |
| { |
| real scale = TH_CONVERT_ACCREAL_TO_REAL(scale_); |
| if (gradWeight) { |
| THArgCheck(THTensor_(isContiguous)(gradWeight), 4, "gradWeight needs to be contiguous"); |
| gradWeight = THNN_(newViewWeightMM2d)(gradWeight); |
| } |
| if (gradBias) { |
| THArgCheck(THTensor_(isContiguous)(gradBias), 5, "gradBias needs to be contiguous"); |
| } |
| |
| THNN_(SpatialConvolutionMM_shapeCheck) |
| (input, gradOutput, gradWeight, gradBias, kH, kW, dH, dW, padH, padW, 1); |
| |
| input = THTensor_(newContiguous)(input); |
| gradOutput = THTensor_(newContiguous)(gradOutput); |
| |
| if(input->dim() == 3) |
| { |
| THNN_(SpatialConvolutionMM_accGradParameters_frame)(gradOutput, gradWeight, |
| gradBias, finput, scale); |
| } |
| else |
| { |
| int64_t T = input->size(0); |
| int64_t t; |
| |
| for(t = 0; t < T; t++) |
| { |
| THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); |
| THTensor *finput_t = NULL; |
| if (gradWeight) { |
| finput_t = THTensor_(newSelect)(finput, 0, t); |
| } |
| |
| THNN_(SpatialConvolutionMM_accGradParameters_frame)(gradOutput_t, gradWeight, |
| gradBias, finput_t, scale); |
| |
| THTensor_(free)(gradOutput_t); |
| if (gradWeight) { |
| THTensor_(free)(finput_t); |
| } |
| } |
| } |
| |
| THTensor_(free)(input); |
| THTensor_(free)(gradOutput); |
| if (gradWeight) { |
| THTensor_(free)(gradWeight); |
| } |
| } |
| |
| #endif |