fix cuda GatherOp for empty batch
Summary: as title
Differential Revision: D5840432
fbshipit-source-id: 5d9021f152c21d24e91dc0cc3d95443782afc228
diff --git a/caffe2/operators/utility_ops.cu b/caffe2/operators/utility_ops.cu
index 3e877fd..aed3511 100644
--- a/caffe2/operators/utility_ops.cu
+++ b/caffe2/operators/utility_ops.cu
@@ -215,6 +215,12 @@
const Index* idxs = indices.template data<Index>();
auto out = static_cast<float*>(output->raw_mutable_data(data.meta()));
+ // return early when the input is empty, since CUDA kernel will fail for
+ // empty input.
+ if (N <= 0) {
+ return true;
+ }
+
GatherKernel<<<
std::min(N, CAFFE_MAXIMUM_NUM_BLOCKS),
CAFFE_CUDA_NUM_THREADS,
diff --git a/caffe2/python/operator_test/gather_ops_test.py b/caffe2/python/operator_test/gather_ops_test.py
index 4fde1d3..323082b 100644
--- a/caffe2/python/operator_test/gather_ops_test.py
+++ b/caffe2/python/operator_test/gather_ops_test.py
@@ -13,17 +13,20 @@
class TestGatherOps(hu.HypothesisTestCase):
@given(rows_num=st.integers(1, 10000),
- index_num=st.integers(1, 5000),
+ index_num=st.integers(0, 5000),
**hu.gcs)
def test_gather_ops(self, rows_num, index_num, gc, dc):
data = np.random.random((rows_num, 10, 20)).astype(np.float32)
- ind = np.random.randint(rows_num, size=(index_num, 1)).astype('int32')
+ ind = np.random.randint(rows_num, size=(index_num, )).astype('int32')
op = core.CreateOperator(
'Gather',
['data', 'ind'],
['output'])
def ref_gather(data, ind):
+ if ind.size == 0:
+ return [np.zeros((0, 10, 20)).astype(np.float32)]
+
output = [r for r in [data[i] for i in ind]]
return [output]