Fix SpatialSubSampling (was doing non-atomic writes in backprop).
Also some changes to test to make it less flaky:
- Decrease some output counts to reduce out-of-memory issues.
- Decrease some tolerances.
- Increase precision of random seed so that multiple test launches at
the same time start with different seeds.
diff --git a/SpatialSubSampling.cu b/SpatialSubSampling.cu
index 91d6beb..5d22aa4 100644
--- a/SpatialSubSampling.cu
+++ b/SpatialSubSampling.cu
@@ -342,9 +342,15 @@
dim3 threads(32,8);
// run updateGradInput kernel
- subgradinput <<<blocks, threads, 0, THCState_getCurrentStream(state)>>> (
- gradInput_data, gradOutput_data, weight_data,
- nInputPlane, nInputRows, nInputCols, kH, kW, dH, dW);
+ if (kH <= dH && kW <= dW) {
+ subgradinput <<<blocks, threads, 0, THCState_getCurrentStream(state)>>> (
+ gradInput_data, gradOutput_data, weight_data,
+ nInputPlane, nInputRows, nInputCols, kH, kW, dH, dW);
+ } else {
+ subgradinputAtomic <<<blocks, threads, 0, THCState_getCurrentStream(state)>>> (
+ gradInput_data, gradOutput_data, weight_data,
+ nInputPlane, nInputRows, nInputCols, kH, kW, dH, dW);
+ }
} else {
long nInputCols = input->size[3];
long nInputRows = input->size[2];
@@ -365,15 +371,14 @@
dim3 threads(32,8);
// run updateGradInput kernel
- if (kH == dH && kW == dW) {
+ if (kH <= dH && kW <= dW) {
subgradinput <<<blocks, threads, 0, THCState_getCurrentStream(state)>>> (
gradInput_data, gradOutput_data, weight_data,
nInputPlane, nInputRows, nInputCols, kH, kW, dH, dW);
} else {
subgradinputAtomic <<<blocks, threads, 0, THCState_getCurrentStream(state)>>> (
gradInput_data, gradOutput_data, weight_data,
- nInputPlane, nInputRows, nInputCols,
- kH, kW, dH, dW);
+ nInputPlane, nInputRows, nInputCols, kH, kW, dH, dW);
}
}