torch/lib/THNN/generic/VolumetricReplicationPadding.c - platform/external/pytorch - Git at Google

 #ifndef TH_GENERIC_FILE
 #define TH_GENERIC_FILE "generic/VolumetricReplicationPadding.c"
 #else

 static inline void THNN_(VolumetricReplicationPadding_shapeCheck)(
                          THNNState *state,
                          THTensor *input,
                          THTensor *gradOutput,
                          int pleft, int pright,
                          int ptop, int pbottom,
                          int pfront, int pback) {
   int dimw = 3;
   int dimh = 2;
   int dimd = 1;
   int dimslices = 0;
   int64_t nslices;
   int64_t idepth;
   int64_t iheight;
   int64_t iwidth;
   int64_t odepth;
   int64_t oheight;
   int64_t owidth;

   THNN_ARGCHECK(input->nDimension == 4 || input->nDimension == 5, 2, input,
 		"4D or 5D (batch mode) tensor expected for input, but got: %s");

   if (input->nDimension == 5)
   {
     dimw++;
     dimh++;
     dimd++;
     dimslices++;
   }

   /* sizes */
   nslices = input->size[dimslices];
   idepth = input->size[dimd];
   iheight = input->size[dimh];
   iwidth = input->size[dimw];
   odepth = idepth + pfront + pback;
   oheight = iheight + ptop + pbottom;
   owidth  = iwidth + pleft + pright;

   THArgCheck(owidth >= 1 || oheight >= 1 || odepth >= 1, 2,
              "input (D: %d H: %d, W: %d)is too small."
              " Calculated output D: %d H: %d W: %d",
              idepth, iheight, iwidth, odepth, oheight, owidth);

   if (gradOutput != NULL) {
     THArgCheck(nslices == THTensor_(size)(gradOutput, dimslices), 3,
                "gradOutput width unexpected. Expected: %d, Got: %d",
                nslices, THTensor_(size)(gradOutput, dimslices));
     THArgCheck(owidth == THTensor_(size)(gradOutput, dimw), 3,
                "gradOutput width unexpected. Expected: %d, Got: %d",
                owidth, THTensor_(size)(gradOutput, dimw));
     THArgCheck(oheight == THTensor_(size)(gradOutput, dimh), 3,
                "gradOutput height unexpected. Expected: %d, Got: %d",
                oheight, THTensor_(size)(gradOutput, dimh));
     THArgCheck(odepth == THTensor_(size)(gradOutput, dimd), 3,
                "gradOutput depth unexpected. Expected: %d, Got: %d",
                odepth, THTensor_(size)(gradOutput, dimd));
   }
 }

 static void THNN_(VolumetricReplicationPadding_updateOutput_frame)(
   real *input_p, real *output_p,
   int64_t nslices,
   int64_t iwidth, int64_t iheight, int64_t idepth,
   int64_t owidth, int64_t oheight, int64_t odepth,
   int pleft, int pright,
   int ptop, int pbottom,
   int pfront, int pback)
 {
   int iStartX = fmax(0, -pleft);
   int iStartY = fmax(0, -ptop);
   int iStartZ = fmax(0, -pfront);
   int oStartX = fmax(0, pleft);
   int oStartY = fmax(0, ptop);
   int oStartZ = fmax(0, pfront);

   int64_t k, ip_x, ip_y, ip_z;
 #pragma omp parallel for private(k, ip_x, ip_y, ip_z)
   for (k = 0; k < nslices; k++) {
     int64_t i, j, z;
     for (z = 0; z < odepth; z++) {
       for (i = 0; i < oheight; i++) {
         for (j = 0; j < owidth; j++) {
           if (j < pleft) {
             ip_x = pleft;
           } else if (j >= pleft && j < iwidth + pleft) {
             ip_x = j;
           } else {
             ip_x = iwidth + pleft - 1;
           }
           ip_x = ip_x - oStartX + iStartX;

           if (i < ptop) {
             ip_y = ptop;
           } else if (i >= ptop && i < iheight + ptop) {
             ip_y = i;
           } else {
             ip_y = iheight + ptop - 1;
           }
           ip_y = ip_y - oStartY + iStartY;

           if (z < pfront) {
             ip_z = pfront;
           } else if (z >= pfront && z < idepth + pfront) {
             ip_z = z;
           } else {
             ip_z = idepth + pfront - 1;
           }
           ip_z = ip_z - oStartZ + iStartZ;

           real *dest_p = output_p + k * owidth * oheight * odepth +
               z * owidth * oheight + i * owidth + j;
           real *src_p = input_p + k * iwidth * iheight * idepth +
               ip_z * iwidth * iheight + ip_y * iwidth + ip_x;
           *dest_p = *src_p;
         }
       }
     }
   }
 }

 void THNN_(VolumetricReplicationPadding_updateOutput)(THNNState *state,
                                                       THTensor *input,
                                                       THTensor *output,
                                                       int pleft, int pright,
                                                       int ptop, int pbottom,
                                                       int pfront, int pback)
 {
   int dimw = 3;
   int dimh = 2;
   int dimd = 1;
   int dimslices = 0;
   int64_t nbatch = 1;
   int64_t nslices;
   int64_t idepth;
   int64_t iheight;
   int64_t iwidth;
   int64_t odepth;
   int64_t oheight;
   int64_t owidth;
   real *input_data;
   real *output_data;

 THNN_(VolumetricReplicationPadding_shapeCheck)(
       state, input, NULL, pleft, pright,
       ptop, pbottom, pfront, pback);

   if (input->nDimension == 5)
   {
     nbatch = input->size[0];
     dimw++;
     dimh++;
     dimd++;
     dimslices++;
   }

   /* sizes */
   nslices = input->size[dimslices];
   idepth = input->size[dimd];
   iheight = input->size[dimh];
   iwidth = input->size[dimw];
   odepth = idepth + pfront + pback;
   oheight = iheight + ptop + pbottom;
   owidth  = iwidth + pleft + pright;

   /* get contiguous input */
   input = THTensor_(newContiguous)(input);

   /* resize output */
   if (input->nDimension == 4)
   {
     THTensor_(resize4d)(output, nslices, odepth, oheight, owidth);

     input_data = THTensor_(data)(input);
     output_data = THTensor_(data)(output);

     THNN_(VolumetricReplicationPadding_updateOutput_frame)(
          input_data, output_data, nslices, iwidth, iheight, idepth,
          owidth, oheight, odepth, pleft, pright, ptop, pbottom, pfront,
          pback);
   }
   else
   {
     int64_t p;

     THTensor_(resize5d)(output, nbatch, nslices, odepth, oheight, owidth);

     input_data = THTensor_(data)(input);
     output_data = THTensor_(data)(output);

 #pragma omp parallel for private(p)
     for (p = 0; p < nbatch; p++)
     {
       THNN_(VolumetricReplicationPadding_updateOutput_frame)(
         input_data + p * nslices * iwidth * iheight * idepth,
         output_data + p * nslices * owidth * oheight * odepth,
         nslices,
         iwidth, iheight, idepth,
         owidth, oheight, odepth,
         pleft, pright,
         ptop, pbottom,
         pfront, pback);
     }
   }

   /* cleanup */
   THTensor_(free)(input);
 }

 static void THNN_(VolumetricReplicationPadding_updateGradInput_frame)(
   real *ginput_p, real *goutput_p,
   int64_t nslices,
   int64_t iwidth, int64_t iheight, int64_t idepth,
   int64_t owidth, int64_t oheight, int64_t odepth,
   int pleft, int pright,
   int ptop, int pbottom,
   int pfront, int pback)
 {
   int iStartX = fmax(0, -pleft);
   int iStartY = fmax(0, -ptop);
   int iStartZ = fmax(0, -pfront);
   int oStartX = fmax(0, pleft);
   int oStartY = fmax(0, ptop);
   int oStartZ = fmax(0, pfront);

   int64_t k, ip_x, ip_y, ip_z;
 #pragma omp parallel for private(k, ip_x, ip_y, ip_z)
   for (k = 0; k < nslices; k++) {
     int64_t i, j, z;
     for (z = 0; z < odepth; z++) {
       for (i = 0; i < oheight; i++) {
         for (j = 0; j < owidth; j++) {
           if (j < pleft) {
             ip_x = pleft;
           } else if (j >= pleft && j < iwidth + pleft) {
             ip_x = j;
           } else {
             ip_x = iwidth + pleft - 1;
           }
           ip_x = ip_x - oStartX + iStartX;

           if (i < ptop) {
             ip_y = ptop;
           } else if (i >= ptop && i < iheight + ptop) {
             ip_y = i;
           } else {
             ip_y = iheight + ptop - 1;
           }
           ip_y = ip_y - oStartY + iStartY;

           if (z < pfront) {
             ip_z = pfront;
           } else if (z >= pfront && z < idepth + pfront) {
             ip_z = z;
           } else {
             ip_z = idepth + pfront - 1;
           }
           ip_z = ip_z - oStartZ + iStartZ;

           real *src_p = goutput_p + k * owidth * oheight * odepth +
               z * owidth * oheight + i * owidth + j;
           real *dest_p = ginput_p + k * iwidth * iheight * idepth +
               ip_z * iwidth * iheight + ip_y * iwidth + ip_x;
           *dest_p += *src_p;
         }
       }
     }
   }
 }

 void THNN_(VolumetricReplicationPadding_updateGradInput)(THNNState *state,
                                                          THTensor *input,
                                                          THTensor *gradOutput,
                                                          THTensor *gradInput,
                                                          int pleft, int pright,
                                                          int ptop, int pbottom,
                                                          int pfront, int pback)
 {
   int dimw = 3;
   int dimh = 2;
   int dimd = 1;
   int dimslices = 0;
   int64_t nbatch = 1;
   int64_t nslices;
   int64_t idepth;
   int64_t iheight;
   int64_t iwidth;
   int64_t odepth;
   int64_t oheight;
   int64_t owidth;

   if (input->nDimension == 5)
   {
     nbatch = input->size[0];
     dimw++;
     dimh++;
     dimd++;
     dimslices++;
   }

   /* sizes */
   nslices = input->size[dimslices];
   idepth = input->size[dimd];
   iheight = input->size[dimh];
   iwidth = input->size[dimw];
   odepth = idepth + pfront + pback;
   oheight = iheight + ptop + pbottom;
   owidth  = iwidth + pleft + pright;


 THNN_(VolumetricReplicationPadding_shapeCheck)(
       state, input, NULL, pleft, pright,
       ptop, pbottom, pfront, pback);

   /* get contiguous gradOutput */
   gradOutput = THTensor_(newContiguous)(gradOutput);

   /* resize */
   THTensor_(resizeAs)(gradInput, input);
   THTensor_(zero)(gradInput);

   /* backprop */
   if (input->nDimension == 4) {
     THNN_(VolumetricReplicationPadding_updateGradInput_frame)(
       THTensor_(data)(gradInput),
       THTensor_(data)(gradOutput),
       nslices,
       iwidth, iheight, idepth,
       owidth, oheight, odepth,
       pleft, pright,
       ptop, pbottom,
       pfront, pback);
   } else {
     int64_t p;
 #pragma omp parallel for private(p)
     for (p = 0; p < nbatch; p++) {
       THNN_(VolumetricReplicationPadding_updateGradInput_frame)(
         THTensor_(data)(gradInput) + p * nslices * idepth * iheight * iwidth,
         THTensor_(data)(gradOutput) + p * nslices * odepth * oheight * owidth,
         nslices,
         iwidth, iheight, idepth,
         owidth, oheight, odepth,
         pleft, pright,
         ptop, pbottom,
         pfront, pback);
     }
   }

   /* cleanup */
   THTensor_(free)(gradOutput);
 }

 #endif
	#ifndef TH_GENERIC_FILE
	#define TH_GENERIC_FILE "generic/VolumetricReplicationPadding.c"
	#else

	static inline void THNN_(VolumetricReplicationPadding_shapeCheck)(
	THNNState *state,
	THTensor *input,
	THTensor *gradOutput,
	int pleft, int pright,
	int ptop, int pbottom,
	int pfront, int pback) {
	int dimw = 3;
	int dimh = 2;
	int dimd = 1;
	int dimslices = 0;
	int64_t nslices;
	int64_t idepth;
	int64_t iheight;
	int64_t iwidth;
	int64_t odepth;
	int64_t oheight;
	int64_t owidth;

	THNN_ARGCHECK(input->nDimension == 4 \|\| input->nDimension == 5, 2, input,
	"4D or 5D (batch mode) tensor expected for input, but got: %s");

	if (input->nDimension == 5)
	{
	dimw++;
	dimh++;
	dimd++;
	dimslices++;
	}

	/* sizes */
	nslices = input->size[dimslices];
	idepth = input->size[dimd];
	iheight = input->size[dimh];
	iwidth = input->size[dimw];
	odepth = idepth + pfront + pback;
	oheight = iheight + ptop + pbottom;
	owidth = iwidth + pleft + pright;

	THArgCheck(owidth >= 1 \|\| oheight >= 1 \|\| odepth >= 1, 2,
	"input (D: %d H: %d, W: %d)is too small."
	" Calculated output D: %d H: %d W: %d",
	idepth, iheight, iwidth, odepth, oheight, owidth);

	if (gradOutput != NULL) {
	THArgCheck(nslices == THTensor_(size)(gradOutput, dimslices), 3,
	"gradOutput width unexpected. Expected: %d, Got: %d",
	nslices, THTensor_(size)(gradOutput, dimslices));
	THArgCheck(owidth == THTensor_(size)(gradOutput, dimw), 3,
	"gradOutput width unexpected. Expected: %d, Got: %d",
	owidth, THTensor_(size)(gradOutput, dimw));
	THArgCheck(oheight == THTensor_(size)(gradOutput, dimh), 3,
	"gradOutput height unexpected. Expected: %d, Got: %d",
	oheight, THTensor_(size)(gradOutput, dimh));
	THArgCheck(odepth == THTensor_(size)(gradOutput, dimd), 3,
	"gradOutput depth unexpected. Expected: %d, Got: %d",
	odepth, THTensor_(size)(gradOutput, dimd));
	}
	}

	static void THNN_(VolumetricReplicationPadding_updateOutput_frame)(
	real input_p, real output_p,
	int64_t nslices,
	int64_t iwidth, int64_t iheight, int64_t idepth,
	int64_t owidth, int64_t oheight, int64_t odepth,
	int pleft, int pright,
	int ptop, int pbottom,
	int pfront, int pback)
	{
	int iStartX = fmax(0, -pleft);
	int iStartY = fmax(0, -ptop);
	int iStartZ = fmax(0, -pfront);
	int oStartX = fmax(0, pleft);
	int oStartY = fmax(0, ptop);
	int oStartZ = fmax(0, pfront);

	int64_t k, ip_x, ip_y, ip_z;
	#pragma omp parallel for private(k, ip_x, ip_y, ip_z)
	for (k = 0; k < nslices; k++) {
	int64_t i, j, z;
	for (z = 0; z < odepth; z++) {
	for (i = 0; i < oheight; i++) {
	for (j = 0; j < owidth; j++) {
	if (j < pleft) {
	ip_x = pleft;
	} else if (j >= pleft && j < iwidth + pleft) {
	ip_x = j;
	} else {
	ip_x = iwidth + pleft - 1;
	}
	ip_x = ip_x - oStartX + iStartX;

	if (i < ptop) {
	ip_y = ptop;
	} else if (i >= ptop && i < iheight + ptop) {
	ip_y = i;
	} else {
	ip_y = iheight + ptop - 1;
	}
	ip_y = ip_y - oStartY + iStartY;

	if (z < pfront) {
	ip_z = pfront;
	} else if (z >= pfront && z < idepth + pfront) {
	ip_z = z;
	} else {
	ip_z = idepth + pfront - 1;
	}
	ip_z = ip_z - oStartZ + iStartZ;

	real dest_p = output_p + k owidth * oheight * odepth +
	z * owidth * oheight + i * owidth + j;
	real src_p = input_p + k iwidth * iheight * idepth +
	ip_z * iwidth * iheight + ip_y * iwidth + ip_x;
	dest_p = src_p;
	}
	}
	}
	}
	}

	void THNN_(VolumetricReplicationPadding_updateOutput)(THNNState *state,
	THTensor *input,
	THTensor *output,
	int pleft, int pright,
	int ptop, int pbottom,
	int pfront, int pback)
	{
	int dimw = 3;
	int dimh = 2;
	int dimd = 1;
	int dimslices = 0;
	int64_t nbatch = 1;
	int64_t nslices;
	int64_t idepth;
	int64_t iheight;
	int64_t iwidth;
	int64_t odepth;
	int64_t oheight;
	int64_t owidth;
	real *input_data;
	real *output_data;

	THNN_(VolumetricReplicationPadding_shapeCheck)(
	state, input, NULL, pleft, pright,
	ptop, pbottom, pfront, pback);

	if (input->nDimension == 5)
	{
	nbatch = input->size[0];
	dimw++;
	dimh++;
	dimd++;
	dimslices++;
	}

	/* sizes */
	nslices = input->size[dimslices];
	idepth = input->size[dimd];
	iheight = input->size[dimh];
	iwidth = input->size[dimw];
	odepth = idepth + pfront + pback;
	oheight = iheight + ptop + pbottom;
	owidth = iwidth + pleft + pright;

	/* get contiguous input */
	input = THTensor_(newContiguous)(input);

	/* resize output */
	if (input->nDimension == 4)
	{
	THTensor_(resize4d)(output, nslices, odepth, oheight, owidth);

	input_data = THTensor_(data)(input);
	output_data = THTensor_(data)(output);

	THNN_(VolumetricReplicationPadding_updateOutput_frame)(
	input_data, output_data, nslices, iwidth, iheight, idepth,
	owidth, oheight, odepth, pleft, pright, ptop, pbottom, pfront,
	pback);
	}
	else
	{
	int64_t p;

	THTensor_(resize5d)(output, nbatch, nslices, odepth, oheight, owidth);

	input_data = THTensor_(data)(input);
	output_data = THTensor_(data)(output);

	#pragma omp parallel for private(p)
	for (p = 0; p < nbatch; p++)
	{
	THNN_(VolumetricReplicationPadding_updateOutput_frame)(
	input_data + p * nslices * iwidth * iheight * idepth,
	output_data + p * nslices * owidth * oheight * odepth,
	nslices,
	iwidth, iheight, idepth,
	owidth, oheight, odepth,
	pleft, pright,
	ptop, pbottom,
	pfront, pback);
	}
	}

	/* cleanup */
	THTensor_(free)(input);
	}

	static void THNN_(VolumetricReplicationPadding_updateGradInput_frame)(
	real ginput_p, real goutput_p,
	int64_t nslices,
	int64_t iwidth, int64_t iheight, int64_t idepth,
	int64_t owidth, int64_t oheight, int64_t odepth,
	int pleft, int pright,
	int ptop, int pbottom,
	int pfront, int pback)
	{
	int iStartX = fmax(0, -pleft);
	int iStartY = fmax(0, -ptop);
	int iStartZ = fmax(0, -pfront);
	int oStartX = fmax(0, pleft);
	int oStartY = fmax(0, ptop);
	int oStartZ = fmax(0, pfront);

	int64_t k, ip_x, ip_y, ip_z;
	#pragma omp parallel for private(k, ip_x, ip_y, ip_z)
	for (k = 0; k < nslices; k++) {
	int64_t i, j, z;
	for (z = 0; z < odepth; z++) {
	for (i = 0; i < oheight; i++) {
	for (j = 0; j < owidth; j++) {
	if (j < pleft) {
	ip_x = pleft;
	} else if (j >= pleft && j < iwidth + pleft) {
	ip_x = j;
	} else {
	ip_x = iwidth + pleft - 1;
	}
	ip_x = ip_x - oStartX + iStartX;

	if (i < ptop) {
	ip_y = ptop;
	} else if (i >= ptop && i < iheight + ptop) {
	ip_y = i;
	} else {
	ip_y = iheight + ptop - 1;
	}
	ip_y = ip_y - oStartY + iStartY;

	if (z < pfront) {
	ip_z = pfront;
	} else if (z >= pfront && z < idepth + pfront) {
	ip_z = z;
	} else {
	ip_z = idepth + pfront - 1;
	}
	ip_z = ip_z - oStartZ + iStartZ;

	real src_p = goutput_p + k owidth * oheight * odepth +
	z * owidth * oheight + i * owidth + j;
	real dest_p = ginput_p + k iwidth * iheight * idepth +
	ip_z * iwidth * iheight + ip_y * iwidth + ip_x;
	dest_p += src_p;
	}
	}
	}
	}
	}

	void THNN_(VolumetricReplicationPadding_updateGradInput)(THNNState *state,
	THTensor *input,
	THTensor *gradOutput,
	THTensor *gradInput,
	int pleft, int pright,
	int ptop, int pbottom,
	int pfront, int pback)
	{
	int dimw = 3;
	int dimh = 2;
	int dimd = 1;
	int dimslices = 0;
	int64_t nbatch = 1;
	int64_t nslices;
	int64_t idepth;
	int64_t iheight;
	int64_t iwidth;
	int64_t odepth;
	int64_t oheight;
	int64_t owidth;

	if (input->nDimension == 5)
	{
	nbatch = input->size[0];
	dimw++;
	dimh++;
	dimd++;
	dimslices++;
	}

	/* sizes */
	nslices = input->size[dimslices];
	idepth = input->size[dimd];
	iheight = input->size[dimh];
	iwidth = input->size[dimw];
	odepth = idepth + pfront + pback;
	oheight = iheight + ptop + pbottom;
	owidth = iwidth + pleft + pright;


	THNN_(VolumetricReplicationPadding_shapeCheck)(
	state, input, NULL, pleft, pright,
	ptop, pbottom, pfront, pback);

	/* get contiguous gradOutput */
	gradOutput = THTensor_(newContiguous)(gradOutput);

	/* resize */
	THTensor_(resizeAs)(gradInput, input);
	THTensor_(zero)(gradInput);

	/* backprop */
	if (input->nDimension == 4) {
	THNN_(VolumetricReplicationPadding_updateGradInput_frame)(
	THTensor_(data)(gradInput),
	THTensor_(data)(gradOutput),
	nslices,
	iwidth, iheight, idepth,
	owidth, oheight, odepth,
	pleft, pright,
	ptop, pbottom,
	pfront, pback);
	} else {
	int64_t p;
	#pragma omp parallel for private(p)
	for (p = 0; p < nbatch; p++) {
	THNN_(VolumetricReplicationPadding_updateGradInput_frame)(
	THTensor_(data)(gradInput) + p * nslices * idepth * iheight * iwidth,
	THTensor_(data)(gradOutput) + p * nslices * odepth * oheight * owidth,
	nslices,
	iwidth, iheight, idepth,
	owidth, oheight, odepth,
	pleft, pright,
	ptop, pbottom,
	pfront, pback);
	}
	}

	/* cleanup */
	THTensor_(free)(gradOutput);
	}

	#endif