Add BiasCHW fallback for GPU (#7738)
diff --git a/caffe2/operators/conv_transpose_op_impl.h b/caffe2/operators/conv_transpose_op_impl.h
index 0ee509a..dd35678 100644
--- a/caffe2/operators/conv_transpose_op_impl.h
+++ b/caffe2/operators/conv_transpose_op_impl.h
@@ -102,8 +102,8 @@
// Bias term
if (InputSize() == 3) {
const T* bias_data = Input(BIAS).template data<T>();
-#if !defined(__ARM_NEON__) && !defined(__ARM_NEON)
const T* bm_data = bias_multiplier_.template data<T>();
+#if !defined(__ARM_NEON__) && !defined(__ARM_NEON)
math::Gemm<T, Context>(
CblasNoTrans,
CblasNoTrans,
@@ -119,6 +119,7 @@
#else
math::BiasCHW<T, Context>(
bias_data,
+ bm_data,
C,
output_image_size,
Ydata,
diff --git a/caffe2/utils/math.h b/caffe2/utils/math.h
index e508b37..bd0b2d6 100644
--- a/caffe2/utils/math.h
+++ b/caffe2/utils/math.h
@@ -488,6 +488,7 @@
template <typename T, class Context>
void BiasCHW(
const T* bias,
+ const T* bias_multiplier,
const int bias_channels,
const int image_size,
T* image,
diff --git a/caffe2/utils/math_cpu.cc b/caffe2/utils/math_cpu.cc
index 13d65a9..140471a 100644
--- a/caffe2/utils/math_cpu.cc
+++ b/caffe2/utils/math_cpu.cc
@@ -1675,6 +1675,7 @@
template <>
void BiasCHW<float, CPUContext>(
const float* bias,
+ const float* /*bias_multiplier*/,
const int bias_channels,
const int image_size,
float* image,
diff --git a/caffe2/utils/math_gpu.cu b/caffe2/utils/math_gpu.cu
index 3c0924a..ca05238a 100644
--- a/caffe2/utils/math_gpu.cu
+++ b/caffe2/utils/math_gpu.cu
@@ -272,6 +272,28 @@
}
template <>
+void BiasCHW<float, CUDAContext>(
+ const float* bias,
+ const float* bias_multiplier,
+ const int bias_channels,
+ const int image_size,
+ float* image,
+ CUDAContext* context) {
+ Gemm<float, CUDAContext>(
+ CblasNoTrans,
+ CblasNoTrans,
+ bias_channels,
+ image_size,
+ 1,
+ 1,
+ bias,
+ bias_multiplier,
+ 1,
+ image,
+ context);
+}
+
+template <>
void GemmBatched<float, CUDAContext>(
const CBLAS_TRANSPOSE TransA,
const CBLAS_TRANSPOSE TransB,