| #ifndef TH_GENERIC_FILE |
| #define TH_GENERIC_FILE "generic/VolumetricConvolutionMM.c" |
| #else |
| |
| /* note: due to write issues, this one cannot be parallelized as well as unfolded_copy */ |
| static void THNN_(unfolded_acc_vol)( |
| THTensor *finput, THTensor *input, |
| int kT, int kW, int kH, |
| int dT, int dW, int dH, |
| int pT, int pW, int pH, |
| int nInputPlane, |
| int inputDepth, int inputWidth, int inputHeight, |
| int outputDepth, int outputWidth, int outputHeight) |
| { |
| int nip; |
| real *input_data = THTensor_(data)(input); |
| real *finput_data = THTensor_(data)(finput); |
| |
| //#pragma omp parallel for private(nip) |
| for (nip = 0; nip < nInputPlane; nip++) |
| { |
| int kt, kw, kh, t, y, x, it, ix, iy; |
| for (kt = 0; kt < kT; kt++) |
| { |
| for (kh = 0; kh < kH; kh++) |
| { |
| for (kw = 0; kw < kW; kw++) |
| { |
| real *src = finput_data |
| + nip * (kT*kH*kW*outputDepth*outputHeight*outputWidth) |
| + kt * (kH*kW*outputDepth*outputHeight*outputWidth) |
| + kh * (kW*outputDepth*outputHeight*outputWidth) |
| + kw * (outputDepth*outputHeight*outputWidth); |
| |
| real *dst = input_data + nip*(inputDepth*inputHeight*inputWidth); |
| if (pT > 0 || pH > 0 || pW > 0) |
| { |
| for (t = 0; t < outputDepth; t++) |
| { |
| it = t*dT - pT + kt; |
| for (y = 0; y < outputHeight; y++) |
| { |
| iy = y*dH - pH + kh; |
| for (x = 0; x < outputWidth; x++) |
| { |
| ix = x*dW - pW + kw; |
| if (it < 0 || it >= inputDepth || iy < 0 || iy >= inputHeight || ix < 0 || ix >= inputWidth) |
| { |
| } |
| else |
| { |
| THVector_(add)(dst+it*inputHeight*inputWidth+iy*inputWidth+ix, src+t*outputHeight*outputWidth+y*outputWidth+x, 1, 1); |
| } |
| } |
| } |
| } |
| } |
| else |
| { |
| for (t = 0; t < outputDepth; t++) |
| { |
| it = t*dT + kt; |
| for (y = 0; y < outputHeight; y++) |
| { |
| iy = y*dH + kh; |
| for(x = 0; x < outputWidth; x++) |
| { |
| ix = x*dW + kw; |
| THVector_(add)(dst+it*inputHeight*inputWidth+iy*inputWidth+ix, src+t*outputHeight*outputWidth+y*outputWidth+x, 1, 1); |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| static void THNN_(unfolded_copy_vol)( |
| THTensor *finput, THTensor *input, |
| int kT, int kW, int kH, |
| int dT, int dW, int dH, |
| int pT, int pW, int pH, |
| int nInputPlane, |
| int inputDepth, int inputWidth, int inputHeight, |
| int outputDepth, int outputWidth, int outputHeight) |
| { |
| long k; |
| real *input_data = THTensor_(data)(input); |
| real *finput_data = THTensor_(data)(finput); |
| // #pragma omp parallel for private(k) |
| for (k = 0; k < nInputPlane*kT*kH*kW; k++) |
| { |
| int nip = k / (kT*kH*kW); |
| int rest = k % (kT*kH*kW); |
| int kt = rest / (kH*kW); |
| rest = rest % (kH*kW); |
| int kh = rest / kW; |
| int kw = rest % kW; |
| int t,x,y,it,ix,iy; |
| real *dst = finput_data |
| + nip * (kT*kH*kW*outputDepth*outputHeight*outputWidth) |
| + kt * (kH*kW*outputDepth*outputHeight*outputWidth) |
| + kh * (kW*outputDepth*outputHeight*outputWidth) |
| + kw * (outputDepth*outputHeight*outputWidth); |
| real *src = input_data + nip*(inputDepth*inputHeight*inputWidth); |
| |
| if (pT > 0 || pH > 0 || pW > 0) |
| { |
| for (t = 0; t < outputDepth; t++) |
| { |
| it = t*dT - pT + kt; |
| for (y = 0; y < outputHeight; y++) |
| { |
| iy = y*dH - pH + kh; |
| for (x = 0; x < outputWidth; x++) |
| { |
| ix = x*dW - pW + kw; |
| if (it < 0 || it >= inputDepth || iy < 0 || iy >= inputHeight || ix < 0 || ix >= inputWidth) |
| memset(dst+t*outputHeight*outputWidth+y*outputWidth+x, 0, sizeof(real)*(1)); |
| else |
| memcpy(dst+t*outputHeight*outputWidth+y*outputWidth+x, src+it*inputHeight*inputWidth+iy*inputWidth+ix, sizeof(real)*(1)); |
| } |
| } |
| } |
| } |
| else |
| { |
| for (t = 0; t < outputDepth; t++) |
| { |
| it = t*dT + kt; |
| for (y = 0; y < outputHeight; y++) |
| { |
| iy = y*dH + kh; |
| for(x = 0; x < outputWidth; x++) |
| { |
| ix = x*dW + kw; |
| memcpy(dst+t*outputHeight*outputWidth+y*outputWidth+x, src+it*inputHeight*inputWidth+iy*inputWidth+ix, sizeof(real)*(1)); |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| static void THNN_(VolumetricConvolutionMM_updateOutput_frame)( |
| THTensor *input, THTensor *output, THTensor *weight, THTensor *bias, THTensor *finput, |
| int kT, int kW, int kH, |
| int dT, int dW, int dH, |
| int pT,int pW, int pH, |
| long nInputPlane, long inputDepth, long inputWidth, long inputHeight, |
| long nOutputPlane, long outputDepth, long outputWidth, long outputHeight) |
| { |
| long i; |
| THTensor *output2d; |
| |
| THNN_(unfolded_copy_vol)( |
| finput, input, |
| kT, kW, kH, |
| dT, dW, dH, |
| pT, pW, pH, |
| nInputPlane, |
| inputDepth, inputWidth, inputHeight, |
| outputDepth, outputWidth, outputHeight |
| ); |
| |
| output2d = THTensor_(newWithStorage2d)( |
| output->storage, output->storageOffset, nOutputPlane, -1, |
| outputDepth*outputHeight*outputWidth, -1 |
| ); |
| |
| for (i = 0; i < nOutputPlane; i++) |
| { |
| THVector_(fill)( |
| output->storage->data+output->storageOffset+output->stride[0]*i, |
| THTensor_(get1d)(bias, i), |
| outputDepth*outputHeight*outputWidth |
| ); |
| } |
| |
| THTensor_(addmm)(output2d, 1, output2d, 1, weight, finput); |
| |
| THTensor_(free)(output2d); |
| } |
| |
| void THNN_(VolumetricConvolutionMM_updateOutput)( |
| THNNState *state, |
| THTensor *input, |
| THTensor *output, |
| THTensor *weight, |
| THTensor *bias, |
| THTensor *finput, |
| int kT, int kW, int kH, |
| int dT, int dW, int dH, |
| int pT, int pW, int pH) |
| { |
| int dimf = 0; |
| int dimt = 1; |
| int dimh = 2; |
| int dimw = 3; |
| |
| long nInputPlane; |
| long inputDepth; |
| long inputHeight; |
| long inputWidth; |
| long nOutputPlane; |
| long outputDepth; |
| long outputHeight; |
| long outputWidth; |
| |
| THArgCheck(input->nDimension == 4 || input->nDimension == 5, 2, |
| "4D or 5D(batch mode) tensor expected" |
| ); |
| |
| if (input->nDimension == 5) |
| { |
| dimf++; |
| dimt++; |
| dimh++; |
| dimw++; |
| } |
| |
| nInputPlane = input->size[dimf]; |
| inputDepth = input->size[dimt]; |
| inputHeight = input->size[dimh]; |
| inputWidth = input->size[dimw]; |
| nOutputPlane = weight->size[0]; |
| outputDepth = (inputDepth + 2*pT - kT) / dT + 1; |
| outputHeight = (inputHeight + 2*pH - kH) / dH + 1; |
| outputWidth = (inputWidth + 2*pW - kW) / dW + 1; |
| |
| if (outputWidth < 1 || outputHeight < 1) |
| { |
| THError( |
| "Given input size: (%dx%dx%dx%d). Calculated output size: (%dx%dx%dx%d). Output size is too small", |
| nInputPlane, inputDepth, inputHeight, inputWidth, |
| nOutputPlane, outputDepth, outputHeight, outputWidth |
| ); |
| } |
| |
| if (input->nDimension == 4) |
| { |
| THTensor_(resize2d)(finput, kT*kW*kH*nInputPlane, outputDepth*outputHeight*outputWidth); |
| THTensor_(resize4d)(output, nOutputPlane, outputDepth, outputHeight, outputWidth); |
| |
| THNN_(VolumetricConvolutionMM_updateOutput_frame)( |
| input, output, weight, bias, finput, |
| kT, kW, kH, |
| dT, dW, dH, |
| pT, pW, pH, |
| nInputPlane, inputDepth, inputWidth, inputHeight, |
| nOutputPlane, outputDepth, outputWidth, outputHeight |
| ); |
| } |
| else |
| { |
| long T = input->size[0]; |
| long t; |
| |
| THTensor_(resize3d)(finput, T, kT*kW*kH*nInputPlane, outputDepth*outputHeight*outputWidth); |
| THTensor_(resize5d)(output, T, nOutputPlane, outputDepth, outputHeight, outputWidth); |
| |
| // #pragma omp parallel for private(t) |
| for (t = 0; t < T; t++) |
| { |
| THTensor *input_t = THTensor_(newSelect)(input, 0, t); |
| THTensor *output_t = THTensor_(newSelect)(output, 0, t); |
| THTensor *finput_t = THTensor_(newSelect)(finput, 0, t); |
| |
| THNN_(VolumetricConvolutionMM_updateOutput_frame)( |
| input_t, output_t, weight, bias, finput_t, |
| kT, kW, kH, |
| dT, dW, dH, |
| pT, pW, pH, |
| nInputPlane, inputDepth, inputWidth, inputHeight, |
| nOutputPlane, outputDepth, outputWidth, outputHeight |
| ); |
| |
| THTensor_(free)(input_t); |
| THTensor_(free)(output_t); |
| THTensor_(free)(finput_t); |
| } |
| } |
| } |
| |
| static void THNN_(VolumetricConvolutionMM_updateGradInput_frame)( |
| THTensor *gradInput, THTensor *gradOutput, THTensor *weight, THTensor *fgradInput, |
| int kT, int kW, int kH, |
| int dT, int dW, int dH, |
| int pT, int pW, int pH) |
| { |
| THTensor *gradOutput2d = THTensor_(newWithStorage2d)( |
| gradOutput->storage, gradOutput->storageOffset, |
| gradOutput->size[0], -1, |
| gradOutput->size[1]*gradOutput->size[2]*gradOutput->size[3], -1 |
| ); |
| |
| THTensor_(addmm)(fgradInput, 0, fgradInput, 1, weight, gradOutput2d); |
| THTensor_(free)(gradOutput2d); |
| |
| THTensor_(zero)(gradInput); |
| |
| THNN_(unfolded_acc_vol)( |
| fgradInput, gradInput, |
| kT, kW, kH, |
| dT, dW, dH, |
| pT, pW, pH, |
| gradInput->size[0], gradInput->size[1], gradInput->size[3], gradInput->size[2], |
| gradOutput->size[1], gradOutput->size[3], gradOutput->size[2] |
| ); |
| } |
| |
| void THNN_(VolumetricConvolutionMM_updateGradInput)( |
| THNNState *state, |
| THTensor *input, |
| THTensor *gradOutput, |
| THTensor *gradInput, |
| THTensor *weight, |
| THTensor *finput, |
| THTensor *fgradInput, |
| int kT, int kW, int kH, |
| int dT, int dW, int dH, |
| int pT, int pW, int pH) |
| { |
| // number of input/output planes and kernel size is indirectly defined by the weight tensor |
| THArgCheck(weight->nDimension == 2, 4, |
| "2D weight tensor is expected (nOutputPlane x (nInputPlane * kT * kH * kW))" |
| ); |
| |
| int nOutputPlane = (int)weight->size[0]; |
| |
| THArgCheck(nOutputPlane == gradOutput->size[input->nDimension == 5 ? 1 : 0], 1, |
| "Number of output features is not equal to nOutputPlane" |
| ); |
| |
| THTensor_(resizeAs)(gradInput, input); |
| THTensor_(resizeAs)(fgradInput, finput); |
| THTensor_(transpose)(weight, weight, 0, 1); |
| |
| if (input->nDimension == 4) |
| { |
| THNN_(VolumetricConvolutionMM_updateGradInput_frame)( |
| gradInput, gradOutput, weight, fgradInput, |
| kT, kW, kH, |
| dT, dW, dH, |
| pT, pW, pH |
| ); |
| } |
| else |
| { |
| long T = input->size[0]; |
| long t; |
| |
| //#pragma omp parallel for private(t) |
| for (t = 0; t < T; t++) |
| { |
| THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t); |
| THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); |
| THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t); |
| |
| THNN_(VolumetricConvolutionMM_updateGradInput_frame)( |
| gradInput_t, gradOutput_t, weight, fgradInput_t, |
| kT, kW, kH, |
| dT, dW, dH, |
| pT, pW, pH |
| ); |
| |
| THTensor_(free)(gradInput_t); |
| THTensor_(free)(gradOutput_t); |
| THTensor_(free)(fgradInput_t); |
| } |
| } |
| |
| THTensor_(transpose)(weight, weight, 0, 1); |
| } |
| |
| static void THNN_(VolumetricConvolutionMM_accGradParameters_frame)( |
| THTensor *gradOutput, THTensor *gradWeight, THTensor *gradBias, THTensor *finput, real scale) |
| { |
| long i; |
| THTensor *gradOutput2d = THTensor_(newWithStorage2d)( |
| gradOutput->storage, gradOutput->storageOffset, |
| gradOutput->size[0], -1, |
| gradOutput->size[1]*gradOutput->size[2]*gradOutput->size[3], -1 |
| ); |
| |
| THTensor_(transpose)(finput, finput, 0, 1); |
| THTensor_(addmm)(gradWeight, 1, gradWeight, scale, gradOutput2d, finput); |
| THTensor_(transpose)(finput, finput, 0, 1); |
| |
| for (i = 0; i < gradBias->size[0]; i++) |
| { |
| long k; |
| real sum = 0; |
| real *data = gradOutput2d->storage->data + gradOutput2d->storageOffset + i*gradOutput2d->stride[0]; |
| for (k = 0; k < gradOutput2d->size[1]; k++) |
| sum += data[k]; |
| |
| (gradBias->storage->data + gradBias->storageOffset)[i] += scale * sum; |
| } |
| |
| THTensor_(free)(gradOutput2d); |
| } |
| |
| void THNN_(VolumetricConvolutionMM_accGradParameters)( |
| THNNState *state, |
| THTensor *input, |
| THTensor *gradOutput, |
| THTensor *gradWeight, |
| THTensor *gradBias, |
| THTensor *finput, |
| real scale) |
| { |
| THArgCheck(gradWeight->nDimension == 2, 4, |
| "2D gradWeight tensor is expected (nOutputPlane x (nInputPlane * kT * kH * kW))" |
| ); |
| |
| int nOutputPlane = (int)gradWeight->size[0]; |
| |
| THArgCheck(gradBias->nDimension == 1 && gradBias->size[0] == nOutputPlane, 5, |
| "gradBias tensor has wrong size" |
| ); |
| |
| THArgCheck(nOutputPlane == gradOutput->size[input->nDimension == 5 ? 1 : 0], 3, |
| "Number of output features is not equal to nOutputPlane" |
| ); |
| |
| if (input->nDimension == 4) // non-batch mode |
| { |
| THNN_(VolumetricConvolutionMM_accGradParameters_frame)(gradOutput, gradWeight, gradBias, finput, scale); |
| } |
| else // batch mode |
| { |
| long T = input->size[0]; |
| long t; |
| |
| for (t = 0; t < T; t++) |
| { |
| THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t); |
| THTensor *finput_t = THTensor_(newSelect)(finput, 0, t); |
| |
| THNN_(VolumetricConvolutionMM_accGradParameters_frame)(gradOutput_t, gradWeight, gradBias, finput_t, scale); |
| |
| THTensor_(free)(gradOutput_t); |
| THTensor_(free)(finput_t); |
| } |
| } |
| } |
| |
| #endif |