| #ifndef TH_GENERIC_FILE |
| #define TH_GENERIC_FILE "generic/THTensorConv.c" |
| #else |
| |
| |
| /* |
| 2D Input, 2D kernel : convolve given image with the given kernel. |
| */ |
| void THTensor_(validXCorr2Dptr)(real *r_, |
| real alpha, |
| real *t_, long ir, long ic, |
| real *k_, long kr, long kc, |
| long sr, long sc) |
| { |
| long or = (ir - kr) / sr + 1; |
| long oc = (ic - kc) / sc + 1; |
| |
| long xx, yy, kx, ky; |
| |
| if ((sc != 1) || (oc < 4)) { |
| /* regular convolution */ |
| for(yy = 0; yy < or; yy++) { |
| for(xx = 0; xx < oc; xx++) { |
| /* Dot product in two dimensions... (between input image and the mask) */ |
| real *pi_ = t_ + yy*sr*ic + xx*sc; |
| real *pw_ = k_; |
| real sum = 0; |
| for(ky = 0; ky < kr; ky++) { |
| for(kx = 0; kx < kc; kx++) { |
| sum += pi_[kx]*pw_[kx]; |
| } |
| pi_ += ic; /* next input line */ |
| pw_ += kc; /* next mask line */ |
| } |
| /* Update output */ |
| *r_++ += alpha*sum; |
| } |
| } |
| |
| } else { |
| /* SSE-based convolution */ |
| for(yy = 0; yy < or; yy++) { |
| real *pi_ = t_ + yy*sr*ic; |
| real *pw_ = k_; |
| for (ky = 0; ky < kr; ky++) { |
| real *pis_ = pi_; |
| for (kx = 0; kx < kc; kx++) { |
| THVector_(add)(r_, pis_, alpha*pw_[kx], oc); |
| pis_++; |
| } |
| pi_ += ic; /* next input line */ |
| pw_ += kc; /* next mask line */ |
| } |
| r_ += oc; |
| } |
| } |
| } |
| |
| /* |
| 2D Input, 2D kernel : convolve given image with the given kernel. |
| */ |
| void THTensor_(validConv2Dptr)(real *r_, |
| real alpha, |
| real *t_, long ir, long ic, |
| real *k_, long kr, long kc, |
| long sr, long sc) |
| { |
| long or = (ir - kr) / sr + 1; |
| long oc = (ic - kc) / sc + 1; |
| |
| long xx, yy, kx, ky; |
| |
| if ((sc != 1) || (oc < 4)) { |
| /* regular convolution */ |
| for(yy = 0; yy < or; yy++) { |
| for(xx = 0; xx < oc; xx++) { |
| /* Dot product in two dimensions... (between input image and the mask) */ |
| real *pi_ = t_ + yy*sr*ic + xx*sc; |
| real *pw_ = k_ + kr*kc - 1; |
| real sum = 0; |
| for(ky = 0; ky < kr; ky++) { |
| for(kx = 0; kx < kc; kx++) { |
| sum += pi_[kx]*pw_[-kx]; |
| } |
| pi_ += ic; /* next input line */ |
| pw_ -= kc; /* next mask line */ |
| } |
| /* Update output */ |
| *r_++ += alpha*sum; |
| } |
| } |
| |
| } else { |
| /* SSE-based convolution */ |
| for(yy = 0; yy < or; yy++) { |
| real *pw_ = k_ + kr*kc - 1; |
| real *pi_ = t_ + yy*sr*ic; |
| for (ky = 0; ky < kr; ky++) { |
| real *pis_ = pi_; |
| for (kx = 0; kx < kc; kx++) { |
| THVector_(add)(r_, pis_, alpha*pw_[-kx], oc); |
| pis_++; |
| } |
| pi_ += ic; /* next input line */ |
| pw_ -= kc; /* next mask line */ |
| } |
| r_ += oc; |
| } |
| } |
| } |
| |
| /* |
| 2D Input, 2D kernel : convolve given image with the given kernel, full convolution. |
| */ |
| void THTensor_(fullConv2Dptr)(real *r_, |
| real alpha, |
| real *t_, long ir, long ic, |
| real *k_, long kr, long kc, |
| long sr, long sc) |
| { |
| long oc = (ic - 1) * sc + kc; |
| |
| long xx, yy, kx, ky; |
| |
| if ((sc != 1) || (ic < 4)) { |
| /* regular convolution */ |
| for(yy = 0; yy < ir; yy++) { |
| for(xx = 0; xx < ic; xx++) { |
| /* Outer product in two dimensions... (between input image and the mask) */ |
| real *po_ = r_ + yy*sr*oc + xx*sc; |
| real *pw_ = k_; |
| for(ky = 0; ky < kr; ky++) |
| { |
| real z = *t_ * alpha; |
| for(kx = 0; kx < kc; kx++) { |
| po_[kx] += z * pw_[kx]; |
| } |
| po_ += oc; /* next input line */ |
| pw_ += kc; /* next mask line */ |
| } |
| t_++; |
| } |
| } |
| |
| } else { |
| /* SSE-based convolution */ |
| for(yy = 0; yy < ir; yy++) { |
| real *po_ = r_ + yy*sr*oc; |
| real *pw_ = k_; |
| for (ky = 0; ky < kr; ky++) { |
| real *pos_ = po_; |
| for (kx = 0; kx < kc; kx++) { |
| THVector_(add)(pos_, t_, alpha*pw_[kx], ic); |
| pos_++; |
| } |
| po_ += oc; /* next input line */ |
| pw_ += kc; /* next mask line */ |
| } |
| t_ += ic; |
| } |
| } |
| } |
| |
| /* |
| 2D Input, 2D kernel : convolve given image with the given kernel, full convolution. |
| */ |
| void THTensor_(fullXCorr2Dptr)(real *r_, |
| real alpha, |
| real *t_, long ir, long ic, |
| real *k_, long kr, long kc, |
| long sr, long sc) |
| { |
| long oc = (ic - 1) * sc + kc; |
| |
| long xx, yy, kx, ky; |
| |
| if ((sc != 1) || (ic < 4)) { |
| /* regular convolution */ |
| for(yy = 0; yy < ir; yy++) { |
| for(xx = 0; xx < ic; xx++) { |
| /* Outer product in two dimensions... (between input image and the mask) */ |
| real *po_ = r_ + yy*sr*oc + xx*sc; |
| real *pw_ = k_ + kr*kc -1; |
| long kx, ky; |
| for(ky = 0; ky < kr; ky++) |
| { |
| real z = *t_ * alpha; |
| for(kx = 0; kx < kc; kx++) { |
| po_[kx] += z * pw_[-kx]; |
| } |
| po_ += oc; /* next input line */ |
| pw_ -= kc; /* next mask line */ |
| } |
| t_++; |
| } |
| } |
| |
| } else { |
| /* SSE-based convolution */ |
| for(yy = 0; yy < ir; yy++) { |
| real *po_ = r_ + yy*sr*oc; |
| real *pw_ = k_ + kr*kc -1; |
| for (ky = 0; ky < kr; ky++) { |
| real *pos_ = po_; |
| for (kx = 0; kx < kc; kx++) { |
| THVector_(add)(pos_, t_, pw_[-kx]*alpha, ic); |
| pos_++; |
| } |
| po_ += oc; /* next input line */ |
| pw_ -= kc; /* next mask line */ |
| } |
| t_ += ic; |
| } |
| } |
| } |
| |
| /* |
| 2D Input, 2D kernel : convolve given image with the given kernel, valid convolution. |
| for sr,sc=1 this is equivalent to validXCorr2Dptr, but otherwise it is useful for |
| calculating derivatives wrt a kernel that is applied with stride sr,sc != 1 |
| */ |
| void THTensor_(validXCorr2DRevptr)(real *r_, |
| real alpha, |
| real *t_, long ir, long ic, |
| real *k_, long kr, long kc, |
| long sr, long sc) |
| { |
| long or = ir - (kr - 1) * sr; |
| long oc = ic - (kc - 1) * sc; |
| |
| long xx, yy, kx, ky; |
| |
| if ((sc != 1) || (kc < 4)) { |
| /* regular convolution */ |
| for(yy = 0; yy < kr; yy++) { |
| for(xx = 0; xx < kc; xx++) { |
| real *po_ = r_; |
| real *pi_ = t_ + yy*sr*ic + xx*sc; |
| real z = *k_++ * alpha; |
| |
| for(ky = 0; ky < or; ky++) { |
| for(kx = 0; kx < oc; kx++) |
| po_[kx] += z * pi_[kx]; |
| pi_ += ic; |
| po_ += oc; |
| } |
| } |
| } |
| |
| } else { |
| /* SSE-based convolution */ |
| for(yy = 0; yy < kr; yy++) { |
| for(xx = 0; xx < kc; xx++) { |
| real *po_ = r_; |
| real *pi_ = t_ + yy*sr*ic + xx*sc; |
| real z = *k_++ * alpha; |
| |
| for(ky = 0; ky < or; ky++) { |
| THVector_(add)(po_, pi_, z, oc); |
| pi_ += ic; |
| po_ += oc; |
| } |
| } |
| } |
| } |
| } |
| /* |
| 3D Input, 3D kernel : convolve given volume with the given kernel. |
| */ |
| void THTensor_(validXCorr3Dptr)(real *r_, |
| real alpha, |
| real *t_, long it, long ir, long ic, |
| real *k_, long kt, long kr, long kc, |
| long st, long sr, long sc) |
| { |
| long ot = (it - kt) / st + 1; |
| long or = (ir - kr) / sr + 1; |
| long oc = (ic - kc) / sc + 1; |
| |
| long zz, xx, yy; |
| |
| for (zz = 0; zz < ot; zz++) |
| { |
| for(yy = 0; yy < or; yy++) |
| { |
| for(xx = 0; xx < oc; xx++) |
| { |
| /* Dot product in two dimensions... (between input image and the mask) */ |
| real *pi_ = t_ + zz*st*ir*ic + yy*sr*ic + xx*sc; |
| real *pw_ = k_; |
| real sum = 0; |
| long kz, kx, ky; |
| for(kz = 0; kz < kt; kz++) |
| { |
| for(ky = 0; ky < kr; ky++) |
| { |
| for(kx = 0; kx < kc; kx++) { |
| sum += pi_[kx]*pw_[kx]; |
| } |
| pi_ += ic; /* next input line */ |
| pw_ += kc; /* next mask line */ |
| } |
| pi_ += (ir-kr)*ic; /* next input slice */ |
| } |
| /* Update output */ |
| *r_++ += sum*alpha; |
| } |
| } |
| } |
| } |
| |
| /* |
| 3D Input, 3D kernel : convolve given volume with the given kernel. |
| */ |
| void THTensor_(validConv3Dptr)(real *r_, |
| real alpha, |
| real *t_, long it, long ir, long ic, |
| real *k_, long kt, long kr, long kc, |
| long st, long sr, long sc) |
| { |
| long ot = (it - kt) / st + 1; |
| long or = (ir - kr) / sr + 1; |
| long oc = (ic - kc) / sc + 1; |
| |
| long zz, xx, yy; |
| |
| for(zz = 0; zz < ot; zz++) |
| { |
| for(yy = 0; yy < or; yy++) |
| { |
| for(xx = 0; xx < oc; xx++) |
| { |
| /* Dot product in two dimensions... (between input image and the mask) */ |
| real *pi_ = t_ + zz*st*ir*ic + yy*sr*ic + xx*sc; |
| real *pw_ = k_ + kt*kr*kc - 1; |
| real sum = 0; |
| long kz, kx, ky; |
| for(kz = 0; kz < kt; kz++) |
| { |
| for(ky = 0; ky < kr; ky++) |
| { |
| for(kx = 0; kx < kc; kx++) { |
| sum += pi_[kx]*pw_[-kx]; |
| } |
| pi_ += ic; /* next input line */ |
| pw_ -= kc; /* next mask line */ |
| } |
| pi_ += (ir-kr)*ic; /* next input slice */ |
| } |
| /* Update output */ |
| *r_++ += alpha*sum; |
| } |
| } |
| } |
| } |
| |
| |
| /* |
| 3D Input, 3D kernel : convolve given volume with the given kernel, full convolution. |
| */ |
| void THTensor_(fullConv3Dptr)(real *r_, |
| real alpha, |
| real *t_, long it, long ir, long ic, |
| real *k_, long kt, long kr, long kc, |
| long st, long sr, long sc) |
| { |
| long or = (ir - 1) * sr + kr; |
| long oc = (ic - 1) * sc + kc; |
| |
| long zz, xx, yy; |
| |
| for(zz = 0; zz < it; zz++) |
| { |
| for(yy = 0; yy < ir; yy++) |
| { |
| for(xx = 0; xx < ic; xx++) |
| { |
| /* Outer product in two dimensions... (between input image and the mask) */ |
| real *po_ = r_ + zz*st*or*oc + yy*sr*oc + xx*sc; |
| real *pw_ = k_; |
| long kz, kx, ky; |
| /* printf("Output Plane : %ld,%ld,%ld, input val=%g\n",zz,yy,xx,*t_); */ |
| for(kz = 0; kz < kt; kz++) |
| { |
| for(ky = 0; ky < kr; ky++) |
| { |
| real z = *t_ * alpha; |
| for(kx = 0; kx < kc; kx++) { |
| /* printf("o=%g,k=%g," , po_[kx],pw_[kx]); */ |
| po_[kx] += z * pw_[kx]; |
| /* printf("o=%g " , po_[kx]); */ |
| } |
| /* printf("\n"); */ |
| po_ += oc; /* next input line */ |
| pw_ += kc; /* next mask line */ |
| } |
| po_ += (or-kr)*oc; /* next output slice */ |
| /* printf("\n"); */ |
| } |
| t_++; |
| } |
| } |
| } |
| } |
| |
| /* |
| 3D Input, 3D kernel : convolve given volume with the given kernel, full convolution. |
| */ |
| void THTensor_(fullXCorr3Dptr)(real *r_, |
| real alpha, |
| real *t_, long it, long ir, long ic, |
| real *k_, long kt, long kr, long kc, |
| long st, long sr, long sc) |
| { |
| long or = (ir - 1) * sr + kr; |
| long oc = (ic - 1) * sc + kc; |
| |
| long zz, xx, yy; |
| |
| for(zz = 0; zz < it; zz++) |
| { |
| for(yy = 0; yy < ir; yy++) |
| { |
| for(xx = 0; xx < ic; xx++) |
| { |
| /* Outer product in two dimensions... (between input image and the mask) */ |
| real *po_ = r_ + zz*st*or*oc + yy*sr*oc + xx*sc; |
| real *pw_ = k_ + kt*kr*kc -1; |
| long kz, kx, ky; |
| for(kz = 0; kz < kt; kz++) |
| { |
| for(ky = 0; ky < kr; ky++) |
| { |
| real z = *t_ * alpha; |
| for(kx = 0; kx < kc; kx++) { |
| po_[kx] += z * pw_[-kx]; |
| } |
| po_ += oc; /* next input line */ |
| pw_ -= kc; /* next mask line */ |
| } |
| po_ += (or-kr)*oc; /* next output slice */ |
| } |
| t_++; |
| } |
| } |
| } |
| } |
| |
| /* |
| 3D Input, 3D kernel : convolve given image with the given kernel, valid convolution. |
| for sr,sc=1 this is equivalent to validXCorr3Dptr, but otherwise it is useful for |
| calculating derivatives wrt a kernel that is applied with stride sr,sc != 1 |
| */ |
| void THTensor_(validXCorr3DRevptr)(real *r_, |
| real alpha, |
| real *t_, long it, long ir, long ic, |
| real *k_, long kt, long kr, long kc, |
| long st, long sr, long sc) |
| { |
| long ot = it - (kt - 1) * st; |
| long or = ir - (kr - 1) * sr; |
| long oc = ic - (kc - 1) * sc; |
| |
| long zz, xx, yy; |
| for(zz = 0; zz < kt; zz++) |
| { |
| for(yy = 0; yy < kr; yy++) |
| { |
| for(xx = 0; xx < kc; xx++) |
| { |
| real *po_ = r_; |
| real *pi_ = t_ + zz*st*ir*ic + yy*sr*ic + xx*sc; |
| real z = *k_++ * alpha; |
| long kz, kx, ky; |
| for(kz = 0; kz < ot; kz++) |
| { |
| for(ky = 0; ky < or; ky++) |
| { |
| for(kx = 0; kx < oc; kx++) |
| po_[kx] += z * pi_[kx]; |
| pi_ += ic; |
| po_ += oc; |
| } |
| pi_ += (ir-or)*ic; /* next input slice */ |
| } |
| } |
| } |
| } |
| } |
| |
| void THTensor_(conv2d)(real* output_data, |
| real alpha, |
| real* ptr_input, long nInputRows, long nInputCols, |
| real* ptr_weight, long nKernelRows, long nKernelCols, |
| long srow, long scol, |
| const char *vf, const char *xc) |
| { |
| THArgCheck(*vf == 'V' || *vf == 'F', 7, "type of convolution can be 'V' or 'F'"); |
| THArgCheck(*xc == 'C' || *xc == 'X', 7, "type of convolution can be 'X' or 'C'"); |
| if (*vf == 'F') |
| if (*xc == 'X') |
| THTensor_(fullXCorr2Dptr)(output_data, |
| alpha, |
| ptr_input, nInputRows, nInputCols, |
| ptr_weight, nKernelRows, nKernelCols, |
| srow, scol); |
| else |
| THTensor_(fullConv2Dptr)(output_data, |
| alpha, |
| ptr_input, nInputRows, nInputCols, |
| ptr_weight, nKernelRows, nKernelCols, |
| srow, scol); |
| else |
| if (*xc == 'X') |
| THTensor_(validXCorr2Dptr)(output_data, |
| alpha, |
| ptr_input, nInputRows, nInputCols, |
| ptr_weight, nKernelRows, nKernelCols, |
| srow, scol); |
| else |
| THTensor_(validConv2Dptr)(output_data, |
| alpha, |
| ptr_input, nInputRows, nInputCols, |
| ptr_weight, nKernelRows, nKernelCols, |
| srow, scol); |
| } |
| |
| void THTensor_(conv3d)(real* output_data, |
| real alpha, |
| real* ptr_input, long nInputDepth, long nInputRows, long nInputCols, |
| real* ptr_weight, long nKernelDepth, long nKernelRows, long nKernelCols, |
| long sdepth, long srow, long scol, |
| const char *vf, const char *xc) |
| { |
| THArgCheck(*vf == 'V' || *vf == 'F', 7, "type of convolution can be 'V' or 'F'"); |
| THArgCheck(*xc == 'C' || *xc == 'X', 7, "type of convolution can be 'X' or 'C'"); |
| if (*vf == 'F') |
| if (*xc == 'X') |
| THTensor_(fullXCorr3Dptr)(output_data, |
| alpha, |
| ptr_input, nInputDepth, nInputRows, nInputCols, |
| ptr_weight, nKernelDepth, nKernelRows, nKernelCols, |
| sdepth, srow, scol); |
| else |
| THTensor_(fullConv3Dptr)(output_data, |
| alpha, |
| ptr_input, nInputDepth, nInputRows, nInputCols, |
| ptr_weight, nKernelDepth, nKernelRows, nKernelCols, |
| sdepth, srow, scol); |
| else |
| if (*xc == 'X') |
| THTensor_(validXCorr3Dptr)(output_data, |
| alpha, |
| ptr_input, nInputDepth, nInputRows, nInputCols, |
| ptr_weight, nKernelDepth, nKernelRows, nKernelCols, |
| sdepth, srow, scol); |
| else |
| THTensor_(validConv3Dptr)(output_data, |
| alpha, |
| ptr_input, nInputDepth, nInputRows, nInputCols, |
| ptr_weight, nKernelDepth, nKernelRows, nKernelCols, |
| sdepth, srow, scol); |
| } |
| |
| long THTensor_(convsize)(long x, long k, long s, const char* vf) |
| { |
| THArgCheck(*vf == 'V' || *vf == 'F', 1, "type of convolution can be 'V' or 'F'"); |
| if (*vf == 'V') |
| return (x-k)/s + 1; |
| else |
| return (x-1)*s + k; |
| } |
| |
| |
| /* |
| 3D input, 3D kernel, 4D output |
| like rank1 update |
| A <- xx' + beta*A |
| for sr,sc=1 this is equivalent to conv2Dger, but otherwise it is useful for |
| calculating derivatives wrt a kernel that is applied with stride sr,sc != 1 |
| */ |
| void THTensor_(conv2DRevger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol) |
| { |
| long nInputPlane, nInputRows, nInputCols; |
| long nKernelPlane, nKernelRows, nKernelCols; |
| long nOutputPlane, nOutputRows, nOutputCols; |
| long istride0, kstride0; |
| THTensor *input; |
| THTensor *kernel; |
| real *input_data; |
| real *weight_data; |
| real *output_data; |
| long nelem; |
| long k; |
| |
| THArgCheck(t_->nDimension == 3 , 3, "input: 3D Tensor expected"); |
| THArgCheck(k_->nDimension == 3 , 4, "kernel: 3D Tensor expected"); |
| THArgCheck(srow >= 1, 5, "Stride should be a positive integer"); |
| THArgCheck(scol >= 1, 6, "Stride should be a positive integer"); |
| |
| input = THTensor_(newContiguous)(t_); |
| kernel = THTensor_(newContiguous)(k_); |
| |
| nInputPlane = input->size[0]; |
| istride0 = input->stride[0]; |
| nInputRows = input->size[1]; |
| nInputCols = input->size[2]; |
| |
| kstride0 = kernel->stride[0]; |
| nKernelPlane = kernel->size[0]; |
| nKernelRows = kernel->size[1]; |
| nKernelCols = kernel->size[2]; |
| nOutputPlane = nInputPlane * kernel->size[0]; |
| |
| THArgCheck(nInputRows >= nKernelRows && nInputCols >= nKernelCols , 2, "covn2DRevger : Input image is smaller than kernel"); |
| |
| nOutputRows = nInputRows - (nKernelRows - 1) * srow; |
| nOutputCols = nInputCols - (nKernelCols - 1) * scol; |
| |
| nelem = THTensor_(nElement)(r_); |
| THTensor_(resize4d)(r_,nKernelPlane, nInputPlane, nOutputRows, nOutputCols); |
| |
| input_data = THTensor_(data)(input); |
| weight_data = THTensor_(data)(kernel); |
| output_data = THTensor_(data)(r_); |
| |
| if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) |
| { |
| /*THTensor_(zero)(r_);*/ |
| |
| #pragma omp parallel for private(k) |
| for (k = 0; k < r_->size[0]*r_->size[1]; k++) |
| { |
| real* ptr_output = output_data + k*nOutputCols*nOutputRows; |
| long l; |
| for (l = 0; l < nOutputRows*nOutputCols; l++) |
| ptr_output[l] = 0.0; |
| } |
| } |
| else if (beta != 1) |
| { |
| /*THTensor_(mul)(r_, beta);*/ |
| #pragma omp parallel for private(k) |
| for (k = 0; k < r_->size[0]*r_->size[1]; k++) |
| { |
| real* ptr_output = output_data + k*nOutputCols*nOutputRows; |
| long l; |
| for (l = 0; l < nOutputRows*nOutputCols; l++) |
| ptr_output[l] *= beta; |
| } |
| } |
| |
| #pragma omp parallel for private(k) |
| for(k = 0; k < nKernelPlane; k++) |
| { |
| long i; |
| /* get kernel */ |
| real *ptr_weight = weight_data+k*kstride0; |
| |
| for(i = 0; i < nInputPlane; i++) |
| { |
| /* get output */ |
| real *ptr_output = output_data + k*nInputPlane*nOutputCols*nOutputRows + i*nOutputCols*nOutputRows; |
| /* get input */ |
| real *ptr_input = input_data+i*istride0; |
| |
| /* do image, kernel convolution */ |
| THTensor_(validXCorr2DRevptr)(ptr_output, |
| alpha, |
| ptr_input, nInputRows, nInputCols, |
| ptr_weight, nKernelRows, nKernelCols, |
| srow, scol); |
| /* Next output plane */ |
| /* output_data += nOutputCols*nOutputRows; */ |
| } |
| } |
| THTensor_(free)(input); |
| THTensor_(free)(kernel); |
| } |
| |
| |
| /* |
| 3D input, 3D kernel, 4D output |
| like rank1 update |
| A <- xx' + beta*A |
| for sr,sc=1 this is equivalent to conv2Dger, but otherwise it is useful for |
| calculating derivatives wrt a kernel that is applied with stride sr,sc != 1 |
| */ |
| void THTensor_(conv2DRevgerm)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol) |
| { |
| long nbatch, nInputPlane, nInputRows, nInputCols; |
| long nKernelPlane, nKernelRows, nKernelCols; |
| long nOutputRows, nOutputCols; |
| long istride0, kstride0, istride1, kstride1; |
| THTensor *input; |
| THTensor *kernel; |
| real *input_data; |
| real *weight_data; |
| real *output_data; |
| long nelem; |
| long k; |
| |
| THArgCheck(t_->nDimension == 4 , 3, "input: 4D Tensor expected"); |
| THArgCheck(k_->nDimension == 4 , 4, "kernel: 4D Tensor expected"); |
| THArgCheck(srow >= 1, 5, "Stride should be a positive integer"); |
| THArgCheck(scol >= 1, 6, "Stride should be a positive integer"); |
| |
| input = THTensor_(newContiguous)(t_); |
| kernel = THTensor_(newContiguous)(k_); |
| |
| istride0 = input->stride[0]; |
| istride1 = input->stride[1]; |
| nbatch = input->size[0]; |
| nInputPlane = input->size[1]; |
| nInputRows = input->size[2]; |
| nInputCols = input->size[3]; |
| |
| kstride0 = kernel->stride[0]; |
| kstride1 = kernel->stride[1]; |
| nKernelPlane = kernel->size[1]; |
| nKernelRows = kernel->size[2]; |
| nKernelCols = kernel->size[3]; |
| |
| THArgCheck(nInputRows >= nKernelRows && nInputCols >= nKernelCols , 2, "conv2DRevger : Input image is smaller than kernel"); |
| THArgCheck(kernel->size[0] == input->size[0] , 2, "conv2DRevger : Input batch and kernel batch is not same size"); |
| |
| nOutputRows = nInputRows - (nKernelRows - 1) * srow; |
| nOutputCols = nInputCols - (nKernelCols - 1) * scol; |
| |
| nelem = THTensor_(nElement)(r_); |
| THTensor_(resize4d)(r_,nKernelPlane, nInputPlane, nOutputRows, nOutputCols); |
| |
| input_data = THTensor_(data)(input); |
| weight_data = THTensor_(data)(kernel); |
| output_data = THTensor_(data)(r_); |
| |
| if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) |
| { |
| /*THTensor_(zero)(r_);*/ |
| |
| #pragma omp parallel for private(k) |
| for (k = 0; k < r_->size[0]*r_->size[1]; k++) |
| { |
| real* ptr_output = output_data + k*nOutputCols*nOutputRows; |
| long l; |
| for (l = 0; l < nOutputRows*nOutputCols; l++) |
| ptr_output[l] = 0.0; |
| } |
| } |
| else if (beta != 1) |
| { |
| /*THTensor_(mul)(r_, beta);*/ |
| #pragma omp parallel for private(k) |
| for (k = 0; k < r_->size[0]*r_->size[1]; k++) |
| { |
| real* ptr_output = output_data + k*nOutputCols*nOutputRows; |
| long l; |
| for (l = 0; l < nOutputRows*nOutputCols; l++) |
| ptr_output[l] *= beta; |
| } |
| } |
| |
| #pragma omp parallel for private(k) |
| for(k = 0; k < nKernelPlane; k++) |
| { |
| long i; |
| for(i = 0; i < nInputPlane; i++) |
| { |
| long p; |
| for(p = 0; p < nbatch; p++) |
| { |
| /* get kernel */ |
| real *ptr_weight = weight_data + p*kstride0 + k*kstride1; |
| /* get output */ |
| real *ptr_output = output_data + k*nInputPlane*nOutputCols*nOutputRows + i*nOutputCols*nOutputRows; |
| /* get input */ |
| real *ptr_input = input_data + p*istride0 + i*istride1; |
| |
| /* do image, kernel convolution */ |
| THTensor_(validXCorr2DRevptr)(ptr_output, |
| alpha, |
| ptr_input, nInputRows, nInputCols, |
| ptr_weight, nKernelRows, nKernelCols, |
| srow, scol); |
| /* Next output plane */ |
| /* output_data += nOutputCols*nOutputRows; */ |
| } |
| } |
| } |
| THTensor_(free)(input); |
| THTensor_(free)(kernel); |
| } |
| |
| |
| /* |
| 3D input, 3D kernel, 4D output |
| like rank1 update |
| A <- xx' + beta*A |
| */ |
| void THTensor_(conv2Dger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc) |
| { |
| long nInputPlane, nInputRows, nInputCols; |
| long nKernelPlane, nKernelRows, nKernelCols; |
| long nOutputPlane, nOutputRows, nOutputCols; |
| long istride0, kstride0; |
| |
| THTensor *input; |
| THTensor *kernel; |
| real *input_data; |
| real *weight_data; |
| real *output_data; |
| long nelem; |
| long k; |
| |
| THArgCheck(t_->nDimension == 3 , 3, "input: 3D Tensor expected"); |
| THArgCheck(k_->nDimension == 3 , 4, "kernel: 3D Tensor expected"); |
| THArgCheck(srow >= 1, 5, "Stride should be a positive integer"); |
| THArgCheck(scol >= 1, 6, "Stride should be a positive integer"); |
| THArgCheck(*vf == 'V' || *vf == 'F', 7, "type of convolution can 'V' or 'F'"); |
| THArgCheck(*xc == 'C' || *xc == 'X', 7, "type of convolution can 'X' or 'C'"); |
| |
| input = THTensor_(newContiguous)(t_); |
| kernel = THTensor_(newContiguous)(k_); |
| |
| nInputPlane = input->size[0]; |
| istride0 = input->stride[0]; |
| nInputRows = input->size[1]; |
| nInputCols = input->size[2]; |
| |
| kstride0 = kernel->stride[0]; |
| nKernelPlane = kernel->size[0]; |
| nKernelRows = kernel->size[1]; |
| nKernelCols = kernel->size[2]; |
| nOutputPlane = nInputPlane * kernel->size[0]; |
| |
| THArgCheck((nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv2Dger : Input image is smaller than kernel"); |
| |
| if (*vf == 'F') { |
| nOutputRows = (nInputRows - 1) * srow + nKernelRows; |
| nOutputCols = (nInputCols - 1) * scol + nKernelCols; |
| } else { /* valid */ |
| nOutputRows = (nInputRows - nKernelRows) / srow + 1; |
| nOutputCols = (nInputCols - nKernelCols) / scol + 1; |
| } |
| |
| nelem = THTensor_(nElement)(r_); |
| THTensor_(resize4d)(r_, nKernelPlane, nInputPlane, nOutputRows, nOutputCols); |
| |
| input_data = THTensor_(data)(input); |
| weight_data = THTensor_(data)(kernel); |
| output_data = THTensor_(data)(r_); |
| |
| if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) |
| { |
| /*THTensor_(zero)(r_);*/ |
| #pragma omp parallel for private(k) |
| for (k = 0; k < r_->size[0]*r_->size[1]; k++) |
| { |
| real* ptr_output = output_data + k*nOutputCols*nOutputRows; |
| long l; |
| for (l = 0; l < nOutputRows*nOutputCols; l++) |
| ptr_output[l] = 0.0; |
| } |
| } |
| else if (beta != 1) |
| { |
| /*THTensor_(mul)(r_, beta);*/ |
| #pragma omp parallel for private(k) |
| for (k = 0; k < r_->size[0]*r_->size[1]; k++) |
| { |
| real* ptr_output = output_data + k*nOutputCols*nOutputRows; |
| long l; |
| for (l = 0; l < nOutputRows*nOutputCols; l++) |
| ptr_output[l] *= beta; |
| } |
| } |
| |
| #pragma omp parallel for private(k) |
| for(k = 0; k < nKernelPlane; k++) |
| { |
| long i; |
| /* get kernel */ |
| real *ptr_weight = weight_data+k*kstride0; |
| |
| for(i = 0; i < nInputPlane; i++) |
| { |
| /* get output */ |
| real *ptr_output = output_data + k*nInputPlane*nOutputCols*nOutputRows + i*nOutputCols*nOutputRows; |
| /* get input */ |
| real *ptr_input = input_data+i*istride0; |
| |
| /* do image, kernel convolution */ |
| if (*vf == 'F') |
| if (*xc == 'X') |
| THTensor_(fullXCorr2Dptr)(ptr_output, |
| alpha, |
| ptr_input, nInputRows, nInputCols, |
| ptr_weight, nKernelRows, nKernelCols, |
| srow, scol); |
| else |
| THTensor_(fullConv2Dptr)(ptr_output, |
| alpha, |
| ptr_input, nInputRows, nInputCols, |
| ptr_weight, nKernelRows, nKernelCols, |
| srow, scol); |
| else |
| if (*xc == 'X') |
| THTensor_(validXCorr2Dptr)(ptr_output, |
| alpha, |
| ptr_input, nInputRows, nInputCols, |
| ptr_weight, nKernelRows, nKernelCols, |
| srow, scol); |
| else |
| THTensor_(validConv2Dptr)(ptr_output, |
| alpha, |
| ptr_input, nInputRows, nInputCols, |
| ptr_weight, nKernelRows, nKernelCols, |
| srow, scol); |
| /* Next output plane */ |
| /* output_data += nOutputCols*nOutputRows; */ |
| } |
| } |
| THTensor_(free)(input); |
| THTensor_(free)(kernel); |
| } |
| |
| |
| /* |
| 3D input, 4D kernel, 3D output |
| matrix vector product like |
| y <- Ax + beta*y |
| */ |
| void THTensor_(conv2Dmv)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc) |
| { |
| long nInputPlane, nInputRows, nInputCols; |
| long nKernelRows, nKernelCols; |
| long nOutputPlane, nOutputRows, nOutputCols; |
| long istride0, kstride0, kstride1; |
| THTensor *input; |
| THTensor* kernel; |
| real *input_data; |
| real *weight_data; |
| real *output_data; |
| long nelem; |
| long k; |
| |
| THArgCheck(t_->nDimension == 3 , 3, "input: 3D Tensor expected"); |
| THArgCheck(k_->nDimension == 4 , 4, "kernel: 4D Tensor expected"); |
| THArgCheck(srow >= 1, 5, "Stride should be a positive integer"); |
| THArgCheck(scol >= 1, 6, "Stride should be a positive integer"); |
| THArgCheck(*vf == 'V' || *vf == 'F', 7, "type of convolution can 'V' or 'F'"); |
| THArgCheck(*xc == 'C' || *xc == 'X', 7, "type of convolution can 'X' or 'C'"); |
| |
| input = THTensor_(newContiguous)(t_); |
| if (!(k_->stride[3] == 1) || !(k_->stride[2] == k_->size[3])) { |
| kernel = THTensor_(newContiguous)(k_); |
| } else { |
| THTensor_(retain)(k_); |
| kernel = k_; |
| } |
| |
| nInputPlane = input->size[0]; |
| istride0 = input->stride[0]; |
| nInputRows = input->size[1]; |
| nInputCols = input->size[2]; |
| |
| kstride0 = kernel->stride[0]; |
| kstride1 = kernel->stride[1]; |
| nKernelRows = kernel->size[2]; |
| nKernelCols = kernel->size[3]; |
| nOutputPlane = kernel->size[0]; |
| THArgCheck(kernel->size[1] == nInputPlane, 2, "invalid number of input planes"); |
| |
| THArgCheck( (nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv2Dmv : Input image is smaller than kernel"); |
| |
| if (*vf == 'F') { |
| nOutputRows = (nInputRows - 1) * srow + nKernelRows; |
| nOutputCols = (nInputCols - 1) * scol + nKernelCols; |
| } else { /* valid */ |
| nOutputRows = (nInputRows - nKernelRows) / srow + 1; |
| nOutputCols = (nInputCols - nKernelCols) / scol + 1; |
| } |
| |
| nelem = THTensor_(nElement)(r_); |
| THTensor_(resize3d)(r_, nOutputPlane, nOutputRows, nOutputCols); |
| |
| input_data = THTensor_(data)(input); |
| weight_data = THTensor_(data)(kernel); |
| output_data = THTensor_(data)(r_); |
| |
| if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) |
| { |
| /*THTensor_(zero)(r_);*/ |
| #pragma omp parallel for private(k) |
| for (k = 0; k < r_->size[0]; k++) |
| { |
| real* ptr_output = output_data + k*nOutputCols*nOutputRows; |
| long l; |
| for (l = 0; l < nOutputRows*nOutputCols; l++) |
| ptr_output[l] = 0.0; |
| } |
| } |
| else if (beta != 1) |
| { |
| /*THTensor_(mul)(r_, beta);*/ |
| #pragma omp parallel for private(k) |
| for (k = 0; k < r_->size[0]; k++) |
| { |
| real* ptr_output = output_data + k*nOutputCols*nOutputRows; |
| long l; |
| for (l = 0; l < nOutputRows*nOutputCols; l++) |
| ptr_output[l] *= beta; |
| } |
| } |
| |
| #pragma omp parallel for private(k) |
| for(k = 0; k < nOutputPlane; k++) |
| { |
| long i; |
| /* get output */ |
| real *ptr_output = output_data + k*nOutputCols*nOutputRows; |
| for(i = 0; i < nInputPlane; i++) |
| { |
| /* get kernel */ |
| real *ptr_weight = weight_data + k*kstride0 + i*kstride1; |
| /* get input */ |
| real *ptr_input = input_data + i*istride0; |
| |
| /* do image, kernel convolution */ |
| if (*vf == 'F') |
| if (*xc == 'X') |
| THTensor_(fullXCorr2Dptr)(ptr_output, |
| alpha, |
| ptr_input, nInputRows, nInputCols, |
| ptr_weight, nKernelRows, nKernelCols, |
| srow, scol); |
| else |
| THTensor_(fullConv2Dptr)(ptr_output, |
| alpha, |
| ptr_input, nInputRows, nInputCols, |
| ptr_weight, nKernelRows, nKernelCols, |
| srow, scol); |
| else |
| if (*xc == 'X') |
| THTensor_(validXCorr2Dptr)(ptr_output, |
| alpha, |
| ptr_input, nInputRows, nInputCols, |
| ptr_weight, nKernelRows, nKernelCols, |
| srow, scol); |
| else |
| THTensor_(validConv2Dptr)(ptr_output, |
| alpha, |
| ptr_input, nInputRows, nInputCols, |
| ptr_weight, nKernelRows, nKernelCols, |
| srow, scol); |
| } |
| /* Next output plane */ |
| /* output_data += nOutputCols*nOutputRows;*/ |
| } |
| THTensor_(free)(input); |
| THTensor_(free)(kernel); |
| } |
| |
| |
| /* |
| 3D input, 4D kernel, 3D output |
| matrix vector product like |
| y <- Ax + beta*y |
| */ |
| void THTensor_(conv2Dmm)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc) |
| { |
| long nInputPlane, nInputRows, nInputCols; |
| long nKernelRows, nKernelCols; |
| long nOutputPlane, nOutputRows, nOutputCols; |
| long kstride0, kstride1; |
| THTensor *input; |
| THTensor* kernel; |
| long nbatch; |
| long nelem; |
| real *input_data; |
| real *weight_data; |
| real *output_data; |
| long p; |
| |
| THArgCheck(t_->nDimension == 4 , 3, "input: 4D Tensor expected"); |
| THArgCheck(k_->nDimension == 4 , 4, "kernel: 4D Tensor expected"); |
| THArgCheck(srow >= 1, 5, "Stride should be a positive integer"); |
| THArgCheck(scol >= 1, 6, "Stride should be a positive integer"); |
| THArgCheck(*vf == 'V' || *vf == 'F', 7, "type of convolution can 'V' or 'F'"); |
| THArgCheck(*xc == 'C' || *xc == 'X', 7, "type of convolution can 'X' or 'C'"); |
| |
| input = THTensor_(newContiguous)(t_); |
| if (!(k_->stride[3] == 1) || !(k_->stride[2] == k_->size[3])) { |
| kernel = THTensor_(newContiguous)(k_); |
| } else { |
| THTensor_(retain)(k_); |
| kernel = k_; |
| } |
| |
| nbatch = input->size[0]; |
| nInputPlane = input->size[1]; |
| nInputRows = input->size[2]; |
| nInputCols = input->size[3]; |
| |
| kstride0 = kernel->stride[0]; |
| kstride1 = kernel->stride[1]; |
| nKernelRows = kernel->size[2]; |
| nKernelCols = kernel->size[3]; |
| nOutputPlane = kernel->size[0]; |
| THArgCheck(kernel->size[1] == nInputPlane, 2, "invalid number of input planes"); |
| |
| THArgCheck( (nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv2Dmv : Input image is smaller than kernel"); |
| |
| if (*vf == 'F') { |
| nOutputRows = (nInputRows - 1) * srow + nKernelRows; |
| nOutputCols = (nInputCols - 1) * scol + nKernelCols; |
| } else { /* valid */ |
| nOutputRows = (nInputRows - nKernelRows) / srow + 1; |
| nOutputCols = (nInputCols - nKernelCols) / scol + 1; |
| } |
| |
| nelem = THTensor_(nElement)(r_); |
| THTensor_(resize4d)(r_, nbatch, nOutputPlane, nOutputRows, nOutputCols); |
| |
| input_data = THTensor_(data)(input); |
| weight_data = THTensor_(data)(kernel); |
| output_data = THTensor_(data)(r_); |
| |
| if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) |
| { |
| /*THTensor_(zero)(r_);*/ |
| #pragma omp parallel for private(p) |
| for (p=0; p < r_->size[0]; p++) |
| { |
| long k; |
| for (k = 0; k < r_->size[1]; k++) |
| { |
| real* ptr_output = output_data + p*nOutputPlane*nOutputRows*nOutputCols + k*nOutputCols*nOutputRows; |
| long l; |
| for (l = 0; l < nOutputRows*nOutputCols; l++) |
| ptr_output[l] = 0.0; |
| } |
| } |
| } |
| else if (beta != 1) |
| { |
| /*THTensor_(mul)(r_, beta);*/ |
| #pragma omp parallel for private(p) |
| for(p=0; p < r_->size[0]; p++) |
| { |
| long k; |
| for (k = 0; k < r_->size[1]; k++) |
| { |
| real* ptr_output = output_data + p*nOutputPlane*nOutputRows*nOutputCols + k*nOutputCols*nOutputRows; |
| long l; |
| for (l = 0; l < nOutputRows*nOutputCols; l++) |
| ptr_output[l] *= beta; |
| } |
| } |
| } |
| |
| #pragma omp parallel for private(p) |
| for(p=0; p < nbatch; p++) |
| { |
| long k; |
| for(k = 0; k < nOutputPlane; k++) |
| { |
| long i; |
| /* get output */ |
| real *ptr_output = output_data + p*nOutputPlane*nOutputCols*nOutputRows + k*nOutputCols*nOutputRows; |
| for(i = 0; i < nInputPlane; i++) |
| { |
| /* get kernel */ |
| real *ptr_weight = weight_data + k*kstride0 + i*kstride1; |
| /* get input */ |
| real *ptr_input = input_data + p*nInputPlane*nInputRows*nInputCols + i*nInputRows*nInputCols; |
| |
| /* do image, kernel convolution */ |
| if (*vf == 'F') |
| if (*xc == 'X') |
| THTensor_(fullXCorr2Dptr)(ptr_output, |
| alpha, |
| ptr_input, nInputRows, nInputCols, |
| ptr_weight, nKernelRows, nKernelCols, |
| srow, scol); |
| else |
| THTensor_(fullConv2Dptr)(ptr_output, |
| alpha, |
| ptr_input, nInputRows, nInputCols, |
| ptr_weight, nKernelRows, nKernelCols, |
| srow, scol); |
| else |
| if (*xc == 'X') |
| THTensor_(validXCorr2Dptr)(ptr_output, |
| alpha, |
| ptr_input, nInputRows, nInputCols, |
| ptr_weight, nKernelRows, nKernelCols, |
| srow, scol); |
| else |
| THTensor_(validConv2Dptr)(ptr_output, |
| alpha, |
| ptr_input, nInputRows, nInputCols, |
| ptr_weight, nKernelRows, nKernelCols, |
| srow, scol); |
| } |
| /* Next output plane */ |
| /* output_data += nOutputCols*nOutputRows;*/ |
| } |
| } |
| THTensor_(free)(input); |
| THTensor_(free)(kernel); |
| } |
| |
| |
| /* |
| 2D input, 2D kernel, 2D output |
| scalar multiplication like |
| y <- x*y + beta*y |
| */ |
| void THTensor_(conv2Dmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc) |
| { |
| THTensor *input; |
| THTensor* kernel; |
| long nInputRows; |
| long nInputCols; |
| long nKernelRows; |
| long nKernelCols; |
| long nOutputRows, nOutputCols; |
| real *ptr_input; |
| real *ptr_weight; |
| real *output_data; |
| long nelem; |
| |
| THArgCheck(t_->nDimension == 2 , 3, "input: 2D Tensor expected"); |
| THArgCheck(k_->nDimension == 2 , 4, "kernel: 2D Tensor expected"); |
| THArgCheck(srow >= 1, 5, "Stride should be a positive integer"); |
| THArgCheck(scol >= 1, 6, "Stride should be a positive integer"); |
| |
| input = THTensor_(newContiguous)(t_); |
| kernel = THTensor_(newContiguous)(k_); |
| |
| nInputRows = input->size[0]; |
| nInputCols = input->size[1]; |
| nKernelRows = kernel->size[0]; |
| nKernelCols = kernel->size[1]; |
| |
| THArgCheck((nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv2Dmul : Input image is smaller than kernel"); |
| |
| nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf); |
| nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf); |
| |
| nelem = THTensor_(nElement)(r_); |
| THTensor_(resize2d)(r_, nOutputRows, nOutputCols); |
| if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) |
| THTensor_(zero)(r_); |
| else if (beta != 1) |
| THTensor_(mul)(r_, r_, beta); |
| |
| ptr_input = THTensor_(data)(input); |
| ptr_weight = THTensor_(data)(kernel); |
| output_data = THTensor_(data)(r_); |
| |
| |
| /* do image, kernel convolution */ |
| THTensor_(conv2d)(output_data, |
| alpha, |
| ptr_input, nInputRows, nInputCols, |
| ptr_weight, nKernelRows, nKernelCols, |
| srow, scol, vf, xc); |
| THTensor_(free)(input); |
| THTensor_(free)(kernel); |
| } |
| |
| /* |
| 3D input, 3D kernel, 3D output |
| component wise multiplication like |
| y <- y.*x + beta*y |
| */ |
| void THTensor_(conv2Dcmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc) |
| { |
| long nInputPlane, nInputRows, nInputCols; |
| long nKernelRows, nKernelCols; |
| long nOutputPlane, nOutputRows, nOutputCols; |
| long istride0, kstride0; |
| THTensor *input; |
| THTensor *kernel; |
| real *input_data; |
| real *weight_data; |
| real *output_data; |
| long nelem; |
| long k; |
| |
| THArgCheck(t_->nDimension == 3 , 3, "input: 3D Tensor expected"); |
| THArgCheck(k_->nDimension == 3 , 4, "kernel: 3D Tensor expected"); |
| THArgCheck(srow >= 1, 5, "Stride should be a positive integer"); |
| THArgCheck(scol >= 1, 6, "Stride should be a positive integer"); |
| |
| input = THTensor_(newContiguous)(t_); |
| kernel = THTensor_(newContiguous)(k_); |
| |
| istride0 = input->stride[0]; |
| nInputPlane = input->size[0]; |
| nInputRows = input->size[1]; |
| nInputCols = input->size[2]; |
| |
| kstride0 = kernel->stride[0]; |
| nOutputPlane = kernel->size[0]; |
| nKernelRows = kernel->size[1]; |
| nKernelCols = kernel->size[2]; |
| |
| THArgCheck(nOutputPlane == nInputPlane, 2, "invalid number of input/kernel planes"); |
| THArgCheck( (nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv2Dcmul : Input image is smaller than kernel"); |
| |
| nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf); |
| nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf); |
| |
| nelem = THTensor_(nElement)(r_); |
| THTensor_(resize3d)(r_, nOutputPlane, nOutputRows, nOutputCols); |
| |
| if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) |
| { |
| THTensor_(zero)(r_); |
| } |
| else if (beta != 1) |
| THTensor_(mul)(r_, r_, beta); |
| |
| input_data = THTensor_(data)(input); |
| weight_data = THTensor_(data)(kernel); |
| output_data = THTensor_(data)(r_); |
| |
| for(k = 0; k < nOutputPlane; k++) |
| { |
| /* get kernel */ |
| real *ptr_weight = weight_data + k*kstride0; |
| /* get input */ |
| real *ptr_input = input_data + k*istride0; |
| |
| /* do image, kernel convolution */ |
| THTensor_(conv2d)(output_data, |
| alpha, |
| ptr_input, nInputRows, nInputCols, |
| ptr_weight, nKernelRows, nKernelCols, |
| srow, scol, vf, xc); |
| /* Next output plane */ |
| output_data += nOutputCols*nOutputRows; |
| } |
| THTensor_(free)(input); |
| THTensor_(free)(kernel); |
| } |
| |
| /* |
| 3D input, 3D kernel, 3D output |
| component wise multiplication like with a permutation map |
| y <- y.*x + beta*y |
| */ |
| void THTensor_(conv2Dmap)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, THTensor *map, long srow, long scol, const char *vf, const char *xc) |
| { |
| long nInputPlane, nInputRows, nInputCols; |
| long nKernelRows, nKernelCols; |
| long nOutputPlane, nOutputRows, nOutputCols; |
| long istride0, kstride0; |
| THTensor *input; |
| THTensor* kernel; |
| real *input_data; |
| real *weight_data; |
| real *output_data; |
| long nmaps; |
| long nelem; |
| long k; |
| |
| THArgCheck(t_->nDimension == 3 , 3, "input: 3D Tensor expected"); |
| THArgCheck(k_->nDimension == 3 , 4, "kernel: 3D Tensor expected"); |
| THArgCheck(map->nDimension == 2 , 4, "map: 2D Tensor expected"); |
| THArgCheck(srow >= 1, 6, "Stride should be a positive integer"); |
| THArgCheck(scol >= 1, 7, "Stride should be a positive integer"); |
| |
| input = THTensor_(newContiguous)(t_); |
| kernel = THTensor_(newContiguous)(k_); |
| |
| istride0 = input->stride[0]; |
| nInputPlane = input->size[0]; |
| nInputRows = input->size[1]; |
| nInputCols = input->size[2]; |
| |
| kstride0 = kernel->stride[0]; |
| nOutputPlane = kernel->size[0]; |
| nKernelRows = kernel->size[1]; |
| nKernelCols = kernel->size[2]; |
| |
| THArgCheck(nOutputPlane == nInputPlane, 2, "invalid number of input/kernel planes"); |
| THArgCheck( (nInputRows >= nKernelRows && nInputCols >= nKernelCols) |
| || *vf == 'F', 2, "conv2Dmap : Input image is smaller than kernel"); |
| |
| nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf); |
| nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf); |
| |
| nelem = THTensor_(nElement)(r_); |
| THTensor_(resize3d)(r_, nOutputPlane, nOutputRows, nOutputCols); |
| |
| if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) |
| { |
| THTensor_(zero)(r_); |
| } |
| else if (beta != 1) |
| THTensor_(mul)(r_, r_, beta); |
| |
| input_data = THTensor_(data)(input); |
| weight_data = THTensor_(data)(kernel); |
| output_data = THTensor_(data)(r_); |
| |
| nmaps = map->size[0]; |
| |
| for(k = 0; k < nmaps; k++) |
| { |
| /* get indices */ |
| long from = (long)THTensor_(get2d)(map,k,0)-1; |
| long to = (long)THTensor_(get2d)(map,k,1)-1; |
| |
| /* get kernel */ |
| real *ptr_weight = weight_data + k*kstride0; |
| /* get input */ |
| real *ptr_input = input_data + from*istride0; |
| /* get output */ |
| real *ptr_output = output_data + to*nOutputRows*nOutputCols; |
| |
| /* do image, kernel convolution */ |
| THTensor_(conv2d)(ptr_output, |
| alpha, |
| ptr_input, nInputRows, nInputCols, |
| ptr_weight, nKernelRows, nKernelCols, |
| srow, scol, vf, xc); |
| } |
| THTensor_(free)(input); |
| THTensor_(free)(kernel); |
| } |
| |
| /* |
| 4D input, 4D kernel, 5D output |
| like rank1 update |
| A <- xx' + beta*A |
| for sr,sc=1 this is equivalent to xcorr2Dger, but otherwise it is useful for |
| calculating derivatives wrt a kernel that is applied with stride sr,sc != 1 |
| */ |
| void THTensor_(conv3DRevger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, |
| long sdepth, long srow, long scol) |
| { |
| long nInputPlane, nInputDepth, nInputRows, nInputCols; |
| long nKernelPlane, nKernelDepth, nKernelRows, nKernelCols; |
| long nOutputPlane, nOutputDepth, nOutputRows, nOutputCols; |
| long istride0, kstride0; |
| THTensor *input; |
| THTensor *kernel; |
| real *input_data; |
| real *weight_data; |
| real *output_data; |
| long nelem; |
| long k, i; |
| |
| THArgCheck(t_->nDimension == 4 , 3, "input: 4D Tensor expected"); |
| THArgCheck(k_->nDimension == 4 , 4, "kernel: 4D Tensor expected"); |
| THArgCheck(sdepth >= 1, 5, "Stride should be a positive integer"); |
| THArgCheck(srow >= 1, 6, "Stride should be a positive integer"); |
| THArgCheck(scol >= 1, 7, "Stride should be a positive integer"); |
| |
| input = THTensor_(newContiguous)(t_); |
| kernel = THTensor_(newContiguous)(k_); |
| |
| nInputPlane = input->size[0]; |
| istride0 = input->stride[0]; |
| nInputDepth = input->size[1]; |
| nInputRows = input->size[2]; |
| nInputCols = input->size[3]; |
| |
| kstride0 = kernel->stride[0]; |
| nKernelPlane = kernel->size[0]; |
| nKernelDepth= kernel->size[1]; |
| nKernelRows = kernel->size[2]; |
| nKernelCols = kernel->size[3]; |
| nOutputPlane = nInputPlane * kernel->size[0]; |
| |
| THArgCheck(nInputDepth >= nKernelDepth && nInputRows >= nKernelRows && nInputCols >= nKernelCols , 2, "conv3DRevger : Input image is smaller than kernel"); |
| |
| nOutputDepth = nInputDepth - (nKernelDepth - 1) * sdepth; |
| nOutputRows = nInputRows - (nKernelRows - 1) * srow; |
| nOutputCols = nInputCols - (nKernelCols - 1) * scol; |
| |
| nelem = THTensor_(nElement)(r_); |
| THTensor_(resize5d)(r_,nKernelPlane, nInputPlane, nOutputDepth, nOutputRows, nOutputCols); |
| |
| if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) |
| { |
| THTensor_(zero)(r_); |
| } |
| else if (beta != 1) |
| THTensor_(mul)(r_, r_, beta); |
| |
| input_data = THTensor_(data)(input); |
| weight_data = THTensor_(data)(kernel); |
| output_data = THTensor_(data)(r_); |
| |
| for(k = 0; k < nKernelPlane; k++) |
| { |
| /* get kernel */ |
| real *ptr_weight = weight_data+k*kstride0; |
| |
| for(i = 0; i < nInputPlane; i++) |
| { |
| /* get input */ |
| real *ptr_input = input_data+i*istride0; |
| |
| /* do image, kernel convolution */ |
| THTensor_(validXCorr3DRevptr)(output_data, |
| alpha, |
| ptr_input, nInputDepth, nInputRows, nInputCols, |
| ptr_weight, nKernelDepth, nKernelRows, nKernelCols, |
| sdepth, srow, scol); |
| /* Next output plane */ |
| output_data += nOutputDepth*nOutputCols*nOutputRows; |
| } |
| } |
| THTensor_(free)(input); |
| THTensor_(free)(kernel); |
| } |
| |
| |
| /* |
| 4D input, 4D kernel, 5D output |
| like rank1 update |
| A <- xx' + beta*A |
| */ |
| void THTensor_(conv3Dger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, |
| long sdepth, long srow, long scol, const char *vf, const char *xc) |
| { |
| long nInputPlane, nInputDepth, nInputRows, nInputCols; |
| long nKernelPlane, nKernelDepth, nKernelRows, nKernelCols; |
| long nOutputPlane, nOutputDepth, nOutputRows, nOutputCols; |
| long istride0, kstride0; |
| THTensor *input; |
| THTensor *kernel; |
| real *input_data; |
| real *weight_data; |
| real *output_data; |
| long nelem; |
| long k, i; |
| |
| THArgCheck(t_->nDimension == 4 , 3, "input: 4D Tensor expected"); |
| THArgCheck(k_->nDimension == 4 , 4, "kernel: 4D Tensor expected"); |
| THArgCheck(sdepth >= 1, 5, "Stride should be a positive integer"); |
| THArgCheck(srow >= 1, 6, "Stride should be a positive integer"); |
| THArgCheck(scol >= 1, 7, "Stride should be a positive integer"); |
| THArgCheck(*vf == 'V' || *vf == 'F', 8, "type of convolution can 'V' or 'F'"); |
| THArgCheck(*xc == 'C' || *xc == 'X', 8, "type of convolution can 'X' or 'C'"); |
| |
| input = THTensor_(newContiguous)(t_); |
| kernel = THTensor_(newContiguous)(k_); |
| |
| nInputPlane = input->size[0]; |
| istride0 = input->stride[0]; |
| nInputDepth = input->size[1]; |
| nInputRows = input->size[2]; |
| nInputCols = input->size[3]; |
| |
| kstride0 = kernel->stride[0]; |
| nKernelPlane = kernel->size[0]; |
| nKernelDepth = kernel->size[1]; |
| nKernelRows = kernel->size[2]; |
| nKernelCols = kernel->size[3]; |
| nOutputPlane = nInputPlane * kernel->size[0]; |
| |
| THArgCheck((nInputDepth >= nKernelDepth |
| && nInputRows >= nKernelRows |
| && nInputCols >= nKernelCols) |
| || *vf == 'F', 2, "conv3Dger : Input image is smaller than kernel"); |
| |
| nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf); |
| nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf); |
| nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf); |
| |
| nelem = THTensor_(nElement)(r_); |
| THTensor_(resize5d)(r_,nKernelPlane, nInputPlane, nOutputDepth, nOutputRows, nOutputCols); |
| |
| if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) |
| { |
| THTensor_(zero)(r_); |
| } |
| else if (beta != 1) |
| THTensor_(mul)(r_, r_, beta); |
| |
| input_data = THTensor_(data)(input); |
| weight_data = THTensor_(data)(kernel); |
| output_data = THTensor_(data)(r_); |
| |
| for(k = 0; k < nKernelPlane; k++) |
| { |
| /* get kernel */ |
| real *ptr_weight = weight_data+k*kstride0; |
| |
| for(i = 0; i < nInputPlane; i++) |
| { |
| /* get input */ |
| real *ptr_input = input_data+i*istride0; |
| |
| /* do image, kernel convolution */ |
| THTensor_(conv3d)(output_data, |
| alpha, |
| ptr_input, nInputDepth, nInputRows, nInputCols, |
| ptr_weight, nKernelDepth, nKernelRows, nKernelCols, |
| sdepth, srow, scol, vf, xc); |
| |
| /* Next output plane */ |
| output_data += nOutputDepth*nOutputCols*nOutputRows; |
| } |
| } |
| THTensor_(free)(input); |
| THTensor_(free)(kernel); |
| } |
| |
| /* |
| 4D input, 5D kernel, 4D output |
| matrix vector product like |
| y <- Ax + beta*y |
| */ |
| void THTensor_(conv3Dmv)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, |
| long sdepth, long srow, long scol, const char *vf, const char *xc) |
| { |
| long nInputPlane, nInputDepth, nInputRows, nInputCols; |
| long nKernelDepth, nKernelRows, nKernelCols; |
| long nOutputPlane, nOutputDepth, nOutputRows, nOutputCols; |
| long istride0, kstride0, kstride1; |
| THTensor *input; |
| THTensor *kernel; |
| real *input_data; |
| real *weight_data; |
| real *output_data; |
| long nelem; |
| long k, i; |
| |
| THArgCheck(t_->nDimension == 4 , 3, "input: 4D Tensor expected"); |
| THArgCheck(k_->nDimension == 5 , 4, "kernel: 5D Tensor expected"); |
| THArgCheck(sdepth >= 1, 5, "Stride should be a positive integer"); |
| THArgCheck(srow >= 1, 6, "Stride should be a positive integer"); |
| THArgCheck(scol >= 1, 7, "Stride should be a positive integer"); |
| THArgCheck(*vf == 'V' || *vf == 'F', 8, "type of convolution can 'V' or 'F'"); |
| THArgCheck(*xc == 'C' || *xc == 'X', 8, "type of convolution can 'X' or 'C'"); |
| |
| input = THTensor_(newContiguous)(t_); |
| if (!(k_->stride[4] == 1) || !(k_->stride[3] == k_->size[4])) { |
| kernel = THTensor_(newContiguous)(k_); |
| } else { |
| THTensor_(retain)(k_); |
| kernel = k_; |
| } |
| |
| nInputPlane = input->size[0]; |
| istride0 = input->stride[0]; |
| nInputDepth = input->size[1]; |
| nInputRows = input->size[2]; |
| nInputCols = input->size[3]; |
| |
| kstride0 = kernel->stride[0]; |
| kstride1 = kernel->stride[1]; |
| nKernelDepth = kernel->size[2]; |
| nKernelRows = kernel->size[3]; |
| nKernelCols = kernel->size[4]; |
| nOutputPlane = kernel->size[0]; |
| THArgCheck(kernel->size[1] == nInputPlane, 2, "invalid number of input planes"); |
| |
| THArgCheck( (nInputDepth >= nKernelDepth && nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv3Dmv : Input image is smaller than kernel"); |
| |
| nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf); |
| nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf); |
| nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf); |
| |
| nelem = THTensor_(nElement)(r_); |
| THTensor_(resize4d)(r_, nOutputPlane, nOutputDepth, nOutputRows, nOutputCols); |
| |
| if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) |
| { |
| THTensor_(zero)(r_); |
| } |
| else if (beta != 1) |
| THTensor_(mul)(r_, r_, beta); |
| |
| input_data = THTensor_(data)(input); |
| weight_data = THTensor_(data)(kernel); |
| output_data = THTensor_(data)(r_); |
| |
| for(k = 0; k < nOutputPlane; k++) |
| { |
| for(i = 0; i < nInputPlane; i++) |
| { |
| /* get kernel */ |
| real *ptr_weight = weight_data + k*kstride0 + i*kstride1; |
| /* get input */ |
| real *ptr_input = input_data + i*istride0; |
| |
| /* do image, kernel convolution */ |
| THTensor_(conv3d)(output_data, |
| alpha, |
| ptr_input, nInputDepth, nInputRows, nInputCols, |
| ptr_weight, nKernelDepth, nKernelRows, nKernelCols, |
| sdepth, srow, scol, vf, xc); |
| } |
| /* Next output plane */ |
| output_data += nOutputDepth*nOutputCols*nOutputRows; |
| } |
| THTensor_(free)(input); |
| THTensor_(free)(kernel); |
| } |
| |
| /* |
| 3D input, 3D kernel, 3D output |
| scalar multiplication like |
| y <- x*y + beta*y |
| */ |
| void THTensor_(conv3Dmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, |
| long sdepth, long srow, long scol, const char *vf, const char *xc) |
| { |
| THTensor *input; |
| THTensor* kernel; |
| long nInputDepth; |
| long nInputRows; |
| long nInputCols; |
| long nKernelDepth; |
| long nKernelRows; |
| long nKernelCols; |
| long nOutputDepth, nOutputRows, nOutputCols; |
| real *ptr_input; |
| real *ptr_weight; |
| real *output_data; |
| long nelem; |
| |
| THArgCheck(t_->nDimension == 3 , 3, "input: 3D Tensor expected"); |
| THArgCheck(k_->nDimension == 3 , 4, "kernel: 3D Tensor expected"); |
| THArgCheck(sdepth >= 1, 5, "Stride should be a positive integer"); |
| THArgCheck(srow >= 1, 6, "Stride should be a positive integer"); |
| THArgCheck(scol >= 1, 7, "Stride should be a positive integer"); |
| THArgCheck(*vf == 'V' || *vf == 'F', 8, "type of convolution can 'V' or 'F'"); |
| THArgCheck(*xc == 'C' || *xc == 'X', 8, "type of convolution can 'X' or 'C'"); |
| |
| input = THTensor_(newContiguous)(t_); |
| kernel = THTensor_(newContiguous)(k_); |
| |
| nInputDepth = input->size[0]; |
| nInputRows = input->size[1]; |
| nInputCols = input->size[2]; |
| nKernelDepth = kernel->size[0]; |
| nKernelRows = kernel->size[1]; |
| nKernelCols = kernel->size[2]; |
| |
| THArgCheck((nInputDepth >= nKernelDepth && nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv3Dmul : Input image is smaller than kernel"); |
| |
| nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf); |
| nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf); |
| nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf); |
| |
| nelem = THTensor_(nElement)(r_); |
| THTensor_(resize3d)(r_, nOutputDepth, nOutputRows, nOutputCols); |
| if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) |
| THTensor_(zero)(r_); |
| else if (beta != 1) |
| THTensor_(mul)(r_, r_, beta); |
| |
| ptr_input = THTensor_(data)(input); |
| ptr_weight = THTensor_(data)(kernel); |
| output_data = THTensor_(data)(r_); |
| |
| |
| /* do image, kernel convolution */ |
| THTensor_(conv3d)(output_data, |
| alpha, |
| ptr_input, nInputDepth, nInputRows, nInputCols, |
| ptr_weight, nKernelDepth, nKernelRows, nKernelCols, |
| sdepth, srow, scol, vf, xc); |
| THTensor_(free)(input); |
| THTensor_(free)(kernel); |
| } |
| |
| /* |
| 4D input, 4D kernel, 4D output |
| component wise multiplication like |
| y <- y.*x + beta*y |
| */ |
| void THTensor_(conv3Dcmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, |
| long sdepth, long srow, long scol, const char *vf, const char *xc) |
| { |
| long nInputPlane, nInputDepth, nInputRows, nInputCols; |
| long nKernelDepth, nKernelRows, nKernelCols; |
| long nOutputPlane, nOutputDepth, nOutputRows, nOutputCols; |
| long istride0, kstride0; |
| |
| THTensor *input; |
| THTensor *kernel; |
| real *input_data; |
| real *weight_data; |
| real *output_data; |
| long nelem; |
| long k; |
| |
| THArgCheck(t_->nDimension == 4 , 3, "input: 3D Tensor expected"); |
| THArgCheck(k_->nDimension == 4 , 4, "kernel: 3D Tensor expected"); |
| THArgCheck(srow >= 1, 5, "Stride should be a positive integer"); |
| THArgCheck(scol >= 1, 6, "Stride should be a positive integer"); |
| THArgCheck(*vf == 'V' || *vf == 'F', 7, "type of convolution can 'V' or 'F'"); |
| THArgCheck(*xc == 'C' || *xc == 'X', 7, "type of convolution can 'X' or 'C'"); |
| |
| input = THTensor_(newContiguous)(t_); |
| kernel = THTensor_(newContiguous)(k_); |
| |
| istride0 = input->stride[0]; |
| nInputPlane = input->size[0]; |
| nInputDepth = input->size[1]; |
| nInputRows = input->size[2]; |
| nInputCols = input->size[3]; |
| |
| kstride0 = kernel->stride[0]; |
| nOutputPlane = kernel->size[0]; |
| nKernelDepth = kernel->size[1]; |
| nKernelRows = kernel->size[2]; |
| nKernelCols = kernel->size[3]; |
| |
| THArgCheck(nOutputPlane == nInputPlane, 2, "invalid number of input/kernel planes"); |
| THArgCheck( (nInputDepth >= nKernelDepth && nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, "conv3Dcmul : Input image is smaller than kernel"); |
| |
| nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf); |
| nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf); |
| nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf); |
| |
| nelem = THTensor_(nElement)(r_); |
| THTensor_(resize4d)(r_, nOutputPlane, nOutputDepth, nOutputRows, nOutputCols); |
| |
| if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) |
| { |
| THTensor_(zero)(r_); |
| } |
| else if (beta != 1) |
| THTensor_(mul)(r_, r_, beta); |
| |
| input_data = THTensor_(data)(input); |
| weight_data = THTensor_(data)(kernel); |
| output_data = THTensor_(data)(r_); |
| |
| for(k = 0; k < nOutputPlane; k++) |
| { |
| /* get kernel */ |
| real *ptr_weight = weight_data + k*kstride0; |
| /* get input */ |
| real *ptr_input = input_data + k*istride0; |
| |
| /* do image, kernel convolution */ |
| THTensor_(conv3d)(output_data, |
| alpha, |
| ptr_input, nInputDepth, nInputRows, nInputCols, |
| ptr_weight, nKernelDepth, nKernelRows, nKernelCols, |
| sdepth, srow, scol, vf, xc); |
| |
| /* Next output plane */ |
| output_data += nOutputDepth*nOutputCols*nOutputRows; |
| } |
| THTensor_(free)(input); |
| THTensor_(free)(kernel); |
| } |
| |
| /* |
| 4D input, 4D kernel, 4D output |
| component wise multiplication like with a permutation map |
| y <- y.*x + beta*y |
| */ |
| void THTensor_(conv3Dmap)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, THTensor *map, |
| long sdepth, long srow, long scol, const char *vf, const char *xc) |
| { |
| long nInputPlane, nInputDepth, nInputRows, nInputCols; |
| long nKernelDepth, nKernelRows, nKernelCols; |
| long nOutputPlane, nOutputDepth, nOutputRows, nOutputCols; |
| long istride0, kstride0; |
| |
| THTensor *input; |
| THTensor *kernel; |
| long nelem; |
| real *input_data; |
| real *weight_data; |
| real *output_data; |
| long nmaps; |
| long k; |
| |
| THArgCheck(t_->nDimension == 4 , 3, "input: 4D Tensor expected"); |
| THArgCheck(k_->nDimension == 4 , 4, "kernel: 4D Tensor expected"); |
| THArgCheck(map->nDimension == 2 , 4, "map: 2D Tensor expected"); |
| THArgCheck(srow >= 1, 6, "Stride should be a positive integer"); |
| THArgCheck(scol >= 1, 7, "Stride should be a positive integer"); |
| THArgCheck(*vf == 'V' || *vf == 'F', 8, "type of convolution can 'V' or 'F'"); |
| THArgCheck(*xc == 'C' || *xc == 'X', 8, "type of convolution can 'X' or 'C'"); |
| |
| input = THTensor_(newContiguous)(t_); |
| kernel = THTensor_(newContiguous)(k_); |
| |
| istride0 = input->stride[0]; |
| nInputPlane = input->size[0]; |
| nInputDepth = input->size[1]; |
| nInputRows = input->size[2]; |
| nInputCols = input->size[3]; |
| |
| kstride0 = kernel->stride[0]; |
| nOutputPlane = kernel->size[0]; |
| nKernelDepth = kernel->size[1]; |
| nKernelRows = kernel->size[2]; |
| nKernelCols = kernel->size[3]; |
| |
| THArgCheck(nOutputPlane == nInputPlane, 2, "invalid number of input/kernel planes"); |
| THArgCheck((nInputDepth >= nKernelDepth |
| && nInputRows >= nKernelRows |
| && nInputCols >= nKernelCols) || *vf == 'F', |
| 2, "conv3Dmap : Input image is smaller than kernel"); |
| |
| nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf); |
| nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf); |
| nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf); |
| |
| nelem = THTensor_(nElement)(r_); |
| THTensor_(resize4d)(r_, nOutputPlane, nOutputDepth, nOutputRows, nOutputCols); |
| |
| if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_)) |
| { |
| THTensor_(zero)(r_); |
| } |
| else if (beta != 1) |
| THTensor_(mul)(r_, r_, beta); |
| |
| input_data = THTensor_(data)(input); |
| weight_data = THTensor_(data)(kernel); |
| output_data = THTensor_(data)(r_); |
| |
| nmaps = map->size[0]; |
| |
| for(k = 0; k < nmaps; k++) |
| { |
| /* get indices */ |
| long from = (long)THTensor_(get2d)(map,k,0)-1; |
| long to = (long)THTensor_(get2d)(map,k,1)-1; |
| |
| /* get kernel */ |
| real *ptr_weight = weight_data + k*kstride0; |
| /* get input */ |
| real *ptr_input = input_data + from*istride0; |
| /* get output */ |
| real *ptr_output = output_data + to*nOutputDepth*nOutputRows*nOutputCols; |
| |
| /* do image, kernel convolution */ |
| THTensor_(conv3d)(ptr_output, |
| alpha, |
| ptr_input, nInputDepth, nInputRows, nInputCols, |
| ptr_weight, nKernelDepth, nKernelRows, nKernelCols, |
| sdepth, srow, scol, vf, xc); |
| } |
| THTensor_(free)(input); |
| THTensor_(free)(kernel); |
| } |
| |
| #endif |