blob: 209ce84d2e76745c7ee67c4329bfea131d627bb0 [file] [log] [blame]
/**
* Copyright (c) 2016-present, Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef CAFFE2_OPERATORS_ELEMENTWISE_OP_H_
#define CAFFE2_OPERATORS_ELEMENTWISE_OP_H_
#include "caffe2/core/common_omp.h"
#include "caffe2/core/context.h"
#include "caffe2/core/logging.h"
#include "caffe2/core/operator.h"
#include "caffe2/core/tensor.h"
#include "caffe2/utils/math.h"
namespace caffe2 {
using NumericTypes = TensorTypes<int32_t, int64_t, float, double>;
using IntTypes = TensorTypes<int32_t, int64_t>;
using BoolTypes = TensorTypes<bool>;
struct SameTypeAsInput {
template <typename T>
using type = T;
};
template <typename R>
struct FixedType {
template <typename T>
using type = R;
};
template <
typename InputTypes,
class Context,
class Functor,
class TypeMap = SameTypeAsInput>
class UnaryElementwiseWithArgsOp : public Operator<Context> {
public:
USE_OPERATOR_CONTEXT_FUNCTIONS;
UnaryElementwiseWithArgsOp(const OperatorDef& operator_def, Workspace* ws)
: Operator<Context>(operator_def, ws), functor_(*this) {}
bool RunOnDevice() override {
return DispatchHelper<InputTypes>::call(this, Input(0));
}
template <typename T>
bool DoRunWithType() {
auto& input = Input(0);
auto* output = Output(0);
output->ResizeLike(input);
using R = typename TypeMap::template type<T>;
functor_(
input.size(),
input.template data<T>(),
output->template mutable_data<R>(),
&context_);
return true;
}
private:
Functor functor_;
};
/**
* WithDefaultConstructor is a functor that can be used as the functor of an
* UnaryElementwiseWithArgsOp. It simply forwards the operator() call into
* another functor that doesn't accept arguments in its constructor.
*/
template <typename Functor>
struct WithDefaultConstructor {
explicit WithDefaultConstructor(OperatorBase& /*op*/) {}
template <typename In, typename Out, typename Context>
void operator()(int n, const In* in, Out* out, Context* c) {
Functor()(n, in, out, c);
}
};
/**
* UnaryElementwiseOp is a wrapper around UnaryElementwiseWithArgsOp, with the
* difference that it takes a functor with default constructor, e.g. that does
* not need to take into consideration any arguments during operator creation.
*/
template <
typename InputTypes,
class Context,
class Functor,
class OutputType = SameTypeAsInput>
using UnaryElementwiseOp = UnaryElementwiseWithArgsOp<
InputTypes,
Context,
WithDefaultConstructor<Functor>,
OutputType>;
/**
* Performs a binary operation (e.g. +, - or /) with optional broadcast support.
*
* Functor specifies actual operation to be performed.
*
* If AllowBroadcast=false tensors has to be of exactly the same shape.
*
* If AllowBroadcast=true it support limited broadcasting of the right-hand-side
* argument to match the shape of left-hand-side argument. Only suffix matching
* is supported for now, 1-dim expansion doesn't work yet. More precisely
* tensors A and B can be operated on iff
* `shape(A)[-len(shape(B)):] == * shape(B)`
*/
template <
typename InputTypes,
class Context,
class Functor,
class TypeMap = SameTypeAsInput>
class BinaryElementwiseOp : public Operator<Context> {
public:
USE_OPERATOR_CONTEXT_FUNCTIONS;
BinaryElementwiseOp(const OperatorDef& operator_def, Workspace* ws)
: Operator<Context>(operator_def, ws),
OP_SINGLE_ARG(bool, "broadcast", enable_broadcast_, 0),
OP_SINGLE_ARG(int, "axis", axis_, -1),
OP_SINGLE_ARG(string, "axis_str", axis_str_, ""),
OP_SINGLE_ARG(string, "order", order_, "NCHW"),
functor_() {
// Figure out the correct axis to use.
if (enable_broadcast_) {
if (axis_ != -1) {
// Get axis from an explicit axis argument.
CAFFE_ENFORCE_EQ(
axis_str_.size(),
0,
"Args axis and axis_str cannot be used simultaneously.");
} else if (axis_str_.size()) {
// Get the axis index semantically.
CAFFE_ENFORCE_EQ(
axis_str_.size(), 1, "Unsupported axis string", axis_str_);
size_t semantic_axis_ = order_.find(axis_str_);
CAFFE_ENFORCE_NE(
semantic_axis_,
string::npos,
"Unrecognizable axis string ",
axis_str_,
" from order string ",
order_);
axis_ = semantic_axis_;
}
} else {
CAFFE_ENFORCE(
axis_ == -1 && axis_str_.size() == 0,
"Do not specify axis or axis_str if broadcast is not enabled.");
}
}
bool RunOnDevice() override {
return DispatchHelper<InputTypes>::call(this, Input(0));
}
template <typename T>
bool DoRunWithType() {
const auto& A = Input(0);
const auto& B = Input(1);
auto* C = Output(0);
CAFFE_ENFORCE(
&B != C || !enable_broadcast_,
"In-place is allowed only with the first tensor when broadcasting");
C->ResizeLike(A);
const T* Adata = A.template data<T>();
const T* Bdata = B.template data<T>();
auto* Cdata =
C->template mutable_data<typename TypeMap::template type<T>>();
if (!enable_broadcast_) {
CAFFE_ENFORCE_EQ(
A.dims(),
B.dims(),
"Dimension mismatch - did you forget to set broadcast=1?");
functor_.template Run<false>(A.size(), Adata, Bdata, Cdata, &context_);
} else if (B.size() == 1) {
functor_.template Run<true>(A.size(), Adata, Bdata, Cdata, &context_);
} else {
CAFFE_ENFORCE_GT(
A.ndim(),
B.ndim(),
"If you are doing broadcasting, input1 should have "
"a smaller number of dimensions.");
const int axis = (axis_ == -1 ? A.ndim() - B.ndim() : axis_);
CAFFE_ENFORCE(
axis >= 0 && axis < A.ndim(),
"Broadcast axis should be in the range of the number "
"of dimensions of the first input.");
size_t pre = 1, n = 1, post = 1;
for (int i = 0; i < axis; ++i) {
pre *= A.dim(i);
}
for (int i = 0; i < B.ndim(); ++i) {
CAFFE_ENFORCE_EQ(
A.dim(i + axis), B.dim(i), "Broadcast dimension mismatch.");
n *= B.dim(i);
}
for (int i = axis + B.ndim(); i < A.ndim(); ++i) {
post *= A.dim(i);
}
if (post == 1) {
functor_.RunWithBroadcast(Adata, Bdata, Cdata, pre, n, &context_);
} else {
functor_.RunWithBroadcast2(
Adata, Bdata, Cdata, pre, n, post, &context_);
}
}
return true;
}
private:
bool enable_broadcast_;
int axis_;
string axis_str_;
string order_;
Functor functor_;
};
template <typename Functor>
struct WithoutBroadcast {
template <bool b_is_scalar, typename T, typename R, typename Context>
inline void Run(size_t n, const T* a, const T* b, R* out, Context* c) {
if (b_is_scalar) {
CAFFE_THROW("Broadcast not supported.");
} else {
Functor().Run(n, a, b, out, c);
}
}
template <typename T, typename R, typename Context>
inline void RunWithBroadcast(
const T* /*a*/,
const T* /*b*/,
R* /*out*/,
size_t /*pre*/,
size_t /*n*/,
Context*) {
CAFFE_NOT_IMPLEMENTED;
}
template <typename T, typename R, typename Context>
inline void RunWithBroadcast2(
const T* /*a*/,
const T* /*b*/,
R* /*out*/,
size_t /*pre*/,
size_t /*n*/,
size_t /*post*/,
Context*) {
CAFFE_NOT_IMPLEMENTED;
}
};
// Gradient operator for elementwise division.
template <class Context>
class DivGradientOp final : public Operator<Context> {
public:
USE_SIMPLE_CTOR_DTOR(DivGradientOp);
USE_OPERATOR_CONTEXT_FUNCTIONS;
bool RunOnDevice() override;
};
namespace SRLHelper {
template <typename T>
void sum2one(const T* a, T* y, size_t n);
template <typename T>
void RunWithBroadcastFront(const T* a, T* y, size_t pre, size_t n, CPUContext*);
template <typename T>
void RunWithBroadcastBack(const T* a, T* y, size_t post, size_t n, CPUContext*);
template <typename T>
void RunWithBroadcast2(
const T* a,
T* y,
size_t pre,
size_t n,
size_t post,
CPUContext*);
} // namespace SRLHelper
// Sum reduction operator that is used for computing the gradient in cases
// where the forward op is in broadcast mode.
template <class Context>
class SumReduceLikeOp final : public Operator<Context> {
public:
USE_OPERATOR_CONTEXT_FUNCTIONS;
SumReduceLikeOp(const OperatorDef& operator_def, Workspace* ws)
: Operator<Context>(operator_def, ws),
OP_SINGLE_ARG(int, "axis", axis_, -1),
OP_SINGLE_ARG(string, "axis_str", axis_str_, ""),
OP_SINGLE_ARG(string, "order", order_, "NCHW") {
if (axis_ != -1) {
// Get axis from an explicit axis argument.
CAFFE_ENFORCE_EQ(
axis_str_.size(),
0,
"Args axis and axis_str cannot be used simultaneously.");
} else if (axis_str_.size()) {
// Get the axis index semantically.
CAFFE_ENFORCE_EQ(
axis_str_.size(), 1, "Unsupported axis string", axis_str_);
size_t semantic_axis = order_.find(axis_str_);
CAFFE_ENFORCE_NE(
semantic_axis,
string::npos,
"Unrecognizable axis string ",
axis_str_,
" from order string ",
order_);
axis_ = semantic_axis;
}
}
bool RunOnDevice() override {
return DispatchHelper<TensorTypes<float, double>>::call(this, Input(0));
}
template <typename T>
bool DoRunWithType();
private:
int axis_;
string axis_str_;
string order_;
Tensor<Context> ones_;
Tensor<Context> sum_buffer_;
};
template <class Context>
bool DivGradientOp<Context>::RunOnDevice() {
auto& Y = Input(0);
auto& Z = Input(1);
auto& dZ = Input(2);
auto* dX = Output(0);
auto* dY = Output(1);
CAFFE_ENFORCE_GT(Y.size(), 0);
CAFFE_ENFORCE_GT(Z.size(), 0);
dX->ResizeLike(Y);
dY->ResizeLike(Y);
const float* Ydata = Y.template data<float>();
const float* Zdata = Z.template data<float>();
const float* dZdata = dZ.template data<float>();
float* dXdata = dX->template mutable_data<float>();
float* dYdata = dY->template mutable_data<float>();
ElementWiseDivide(context_, Y.size(), dXdata, dYdata, dZdata, Ydata, Zdata);
return true;
}
// For arithmetic operators, Eigen provides a good way to vectorize even
// when broadcasting.
#define EIGEN_FUNCTOR(name, eigen_op, input_type, output_type) \
struct Eigen##name##Functor { \
template <int b_is_scalar, typename T, typename R> \
inline void Run(size_t n, const T* a, const T* b, R* out, CPUContext*) { \
if (b_is_scalar) { \
EigenVectorArrayMap<R>(out, n) = \
eigen_op((ConstEigenVectorArrayMap<T>(a, n)), (b[0])); \
} else { \
EigenVectorArrayMap<R>(out, n) = eigen_op( \
(ConstEigenVectorArrayMap<T>(a, n)), \
(ConstEigenVectorArrayMap<T>(b, n))); \
} \
} \
template <typename T, typename R> \
void RunWithBroadcast( \
const T* a, \
const T* b, \
R* out, \
size_t pre, \
size_t n, \
CPUContext*) { \
EigenArrayMap<R>(out, n, pre) = eigen_op( \
(ConstEigenArrayMap<T>(a, n, pre).colwise()), \
(ConstEigenVectorArrayMap<T>(b, n))); \
} \
template <typename T, typename R> \
void RunWithBroadcast2( \
const T* a, \
const T* b, \
R* out, \
size_t pre, \
size_t n, \
size_t post, \
CPUContext*) { \
for (int i = 0; i < pre; ++i) { \
EigenArrayMap<R>(out + i * n * post, post, n) = eigen_op( \
(ConstEigenArrayMap<T>(a + i * n * post, post, n).rowwise()), \
(Eigen::Map<const Eigen::Array<T, 1, Eigen::Dynamic>>(b, n))); \
} \
} \
}; \
REGISTER_CPU_OPERATOR( \
name, \
BinaryElementwiseOp< \
input_type, \
CPUContext, \
Eigen##name##Functor, \
output_type>)
} // namespace caffe2
#endif // CAFFE2_OPERATORS_ELEMENTWISE_OP_H_