blob: 3fd27516cb84da6e1734cda40ce999572e723260 [file] [log] [blame]
#include "THCUNN.h"
#include "common.h"
#include "THCDeviceTensor.cuh"
#include "THCDeviceTensorUtils.cuh"
#include "THCDeviceUtils.cuh"
#include "THCReduceApplyUtils.cuh"
#include <THC/THCApply.cuh>
#include "THCHalf.h"
#include "THCHalfAutoNumerics.cuh"
#include "THCAtomics.cuh"
template<typename Dtype>
__global__ void SpatialReflectionPadding_updateOutput(
THCDeviceTensor<Dtype, 4> input,
THCDeviceTensor<Dtype, 4> output,
int padT, int padB, int padL, int padR) {
int outputPointId = threadIdx.x + blockIdx.x * blockDim.x;
int plane = blockIdx.y;
int batch = blockIdx.z;
if (outputPointId >= output.getSize(2) * output.getSize(3)) {
return;
}
int outputPointX = outputPointId % output.getSize(3);
int outputPointY = outputPointId / output.getSize(3);
int iStartX = max(0, -padL);
int iStartY = max(0, -padT);
int oStartX = max(0, padL);
int oStartY = max(0, padT);
int inputPointX = abs(outputPointX - padL)
- abs(outputPointX - (input.getSize(3) + padL - 1))
- outputPointX
+ 2 * padL + input.getSize(3) - 1
- oStartX + iStartX;
int inputPointY = abs(outputPointY - padT)
- abs(outputPointY - (input.getSize(2) + padT - 1))
- outputPointY
+ 2 * padT + input.getSize(2) - 1
- oStartY + iStartY;
Dtype valueToCopy = input[batch][plane][inputPointY][inputPointX];
output[batch][plane][outputPointY][outputPointX] = valueToCopy;
}
template <typename Dtype>
__global__ void SpatialReflectionPadding_updateGradInput(
THCDeviceTensor<Dtype, 4> gradInput,
THCDeviceTensor<Dtype, 4> gradOutput,
int padT, int padB, int padL, int padR) {
int outputPointId = threadIdx.x + blockIdx.x * blockDim.x;
int plane = blockIdx.y;
int batch = blockIdx.z;
if (outputPointId >= gradOutput.getSize(2) * gradOutput.getSize(3)) {
return;
}
int outputPointX = outputPointId % gradOutput.getSize(3);
int outputPointY = outputPointId / gradOutput.getSize(3);
int iStartX = max(0, -padL);
int iStartY = max(0, -padT);
int oStartX = max(0, padL);
int oStartY = max(0, padT);
int inputPointX = abs(outputPointX - padL)
- abs(outputPointX - (gradInput.getSize(3) + padL - 1))
- outputPointX
+ 2 * padL + gradInput.getSize(3) - 1
- oStartX + iStartX;
int inputPointY = abs(outputPointY - padT)
- abs(outputPointY - (gradInput.getSize(2) + padT - 1))
- outputPointY
+ 2 * padT + gradInput.getSize(2) - 1
- oStartY + iStartY;
Dtype valueToCopy = gradOutput[batch][plane][outputPointY][outputPointX];
atomicAdd(&gradInput[batch][plane][inputPointY][inputPointX], valueToCopy);
}
#include "generic/SpatialReflectionPadding.cu"
#include "THCGenerateFloatTypes.h"