|  | #ifndef CAFFE2_OPERATORS_TILE_OP_H_ | 
|  | #define CAFFE2_OPERATORS_TILE_OP_H_ | 
|  |  | 
|  | #include <array> | 
|  | #include <string> | 
|  | #include <type_traits> | 
|  | #include <vector> | 
|  |  | 
|  | #include "caffe2/core/common_omp.h" | 
|  | #include "caffe2/core/context.h" | 
|  | #include "caffe2/core/logging.h" | 
|  | #include "caffe2/core/operator.h" | 
|  | #include "caffe2/utils/eigen_utils.h" | 
|  | #include "caffe2/utils/math.h" | 
|  |  | 
|  | namespace caffe2 { | 
|  |  | 
|  | // Copy a Blob n times along a specified axis. | 
|  | template <class Context> | 
|  | class TileOp final : public Operator<Context> { | 
|  | public: | 
|  | USE_OPERATOR_CONTEXT_FUNCTIONS; | 
|  |  | 
|  | template <class... Args> | 
|  | explicit TileOp(Args&&... args) | 
|  | : Operator<Context>(std::forward<Args>(args)...), | 
|  | OP_SINGLE_ARG(std::int32_t, "tiles", tiles_, 1), | 
|  | OP_SINGLE_ARG(std::int32_t, "axis", axis_, 0) {} | 
|  |  | 
|  | bool RunOnDevice() override { | 
|  | return DispatchHelper< | 
|  | TensorTypes<std::int32_t, std::int64_t, float, double>>:: | 
|  | call(this, Input(0)); | 
|  | } | 
|  |  | 
|  | template <typename T> | 
|  | bool DoRunWithType() { | 
|  | if (InputSize() > 1) { | 
|  | // We potentially have tiles and/or axis specified as inputs | 
|  | // as well. We will check for them in that order. In other words: | 
|  | // InputSize() == 2: tiles is specified | 
|  | // InputSize() == 3: tiles is specified and axis. | 
|  | // Anything specified as input will override the arguments | 
|  | CAFFE_ENFORCE( | 
|  | Input(1).dim() == 1 && Input(1).numel() == 1, | 
|  | "Input `tiles` should be a vector of size 1."); | 
|  | tiles_ = GetArgFromTensor(Input(1)); | 
|  |  | 
|  | // Because of a bug in original code, temporarily adds this part to keep | 
|  | // backward compatibility. | 
|  | // TODO(yangxm): Remove this part when prod runtime upgraded with fixed | 
|  | // model config. | 
|  | if (Input(1).template IsType<std::int64_t>()) { | 
|  | axis_ = 0; | 
|  | } | 
|  |  | 
|  | if (InputSize() > 2) { | 
|  | CAFFE_ENFORCE( | 
|  | Input(2).dim() == 1 && Input(2).numel() == 1, | 
|  | "Input `axis` should be a vector of size 1."); | 
|  | axis_ = GetArgFromTensor(Input(2)); | 
|  | } else { | 
|  | CAFFE_ENFORCE( | 
|  | OperatorBase::HasArgument("axis"), | 
|  | "Argument `axis` is missing and was not specified as input."); | 
|  | } | 
|  | } else { | 
|  | CAFFE_ENFORCE( | 
|  | OperatorBase::HasArgument("tiles"), | 
|  | "Argument `tiles` is missing and was not specified as input."); | 
|  | CAFFE_ENFORCE( | 
|  | OperatorBase::HasArgument("axis"), | 
|  | "Argument `axis` is missing and was not specified as input."); | 
|  | } | 
|  |  | 
|  | const auto& X = Input(0); | 
|  | auto* Y = Output(0); | 
|  | const int axis = X.canonical_axis_index(axis_); | 
|  |  | 
|  | // reshape output to be input tiled along the axis | 
|  | std::vector<std::int64_t> Y_dims = X.sizes().vec(); | 
|  | Y_dims[axis] *= tiles_; | 
|  | Y->Resize(Y_dims); | 
|  |  | 
|  | // size up to (and not including) axis | 
|  | const int outer_size = X.size_to_dim(axis); | 
|  | // size from axis up | 
|  | const int inner_size = X.size_from_dim(axis); | 
|  |  | 
|  | const T* X_data = X.template data<T>(); | 
|  | T* Y_data = Y->template mutable_data<T>(); | 
|  | return DoTile<T>(outer_size, inner_size, X_data, Y_data); | 
|  | } | 
|  |  | 
|  | private: | 
|  | std::int32_t GetArgFromTensor(const Tensor& tensor) { | 
|  | CAFFE_ENFORCE( | 
|  | tensor.IsType<std::int32_t>() || tensor.IsType<std::int64_t>()); | 
|  | std::int32_t val = -1; | 
|  | if (tensor.IsType<std::int32_t>()) { | 
|  | context_.template CopyToCPU<std::int32_t>( | 
|  | 1, tensor.data<std::int32_t>(), &val); | 
|  | } else if (tensor.IsType<std::int64_t>()) { | 
|  | std::int64_t val_int64; | 
|  | context_.template CopyToCPU<std::int64_t>( | 
|  | 1, tensor.data<std::int64_t>(), &val_int64); | 
|  | val = static_cast<std::int32_t>(val_int64); | 
|  | } | 
|  | return val; | 
|  | } | 
|  |  | 
|  | template <typename T> | 
|  | bool DoTile(const int outer_size, const int inner_size, const T* X, T* Y) { | 
|  | if (inner_size == 1) { | 
|  | EigenArrayMap<T> Y_arr(Y, tiles_, outer_size); | 
|  | for (const auto i : c10::irange(outer_size)) { | 
|  | Y_arr.col(i) = X[i]; | 
|  | } | 
|  | } else { | 
|  | ConstEigenArrayMap<T> X_arr(X, inner_size, outer_size); | 
|  | for (const auto i : c10::irange(outer_size)) { | 
|  | EigenArrayMap<T>(Y + i * tiles_ * inner_size, inner_size, tiles_) | 
|  | .colwise() = X_arr.col(i); | 
|  | } | 
|  | } | 
|  | return true; | 
|  | } | 
|  |  | 
|  | std::int32_t tiles_; | 
|  | std::int32_t axis_; | 
|  | }; | 
|  |  | 
|  | template <class Context> | 
|  | class TileGradientOp final : public Operator<Context> { | 
|  | public: | 
|  | USE_OPERATOR_CONTEXT_FUNCTIONS; | 
|  |  | 
|  | template <class... Args> | 
|  | explicit TileGradientOp(Args&&... args) | 
|  | : Operator<Context>(std::forward<Args>(args)...), | 
|  | OP_SINGLE_ARG(std::int32_t, "tiles", tiles_, 1), | 
|  | OP_SINGLE_ARG(std::int32_t, "axis", axis_, 0) {} | 
|  |  | 
|  | bool RunOnDevice() override { | 
|  | return DispatchHelper< | 
|  | TensorTypes<std::int32_t, std::int64_t, float, double>>:: | 
|  | call(this, Input(0)); | 
|  | } | 
|  |  | 
|  | template <typename T> | 
|  | bool DoRunWithType() { | 
|  | if (InputSize() > 1) { | 
|  | // We potentially have tiles and/or axis specified as inputs | 
|  | // as well. We will check for them in that order. In other words: | 
|  | // InputSize() == 2: tiles is specified | 
|  | // InputSize() == 3: tiles is specified and axis. | 
|  | // Anything specified as input will override the arguments | 
|  | CAFFE_ENFORCE( | 
|  | Input(1).dim() == 1 && Input(1).numel() == 1, | 
|  | "Input `tiles` should be a vector of size 1."); | 
|  | tiles_ = GetArgFromTensor(Input(1)); | 
|  | if (InputSize() > 2) { | 
|  | CAFFE_ENFORCE( | 
|  | Input(2).dim() == 1 && Input(2).numel() == 1, | 
|  | "Input `axis` should be a vector of size 1."); | 
|  | axis_ = GetArgFromTensor(Input(2)); | 
|  | } else { | 
|  | CAFFE_ENFORCE( | 
|  | OperatorBase::HasArgument("axis"), | 
|  | "Argument `axis` is missing and was not specified as input."); | 
|  | } | 
|  | } else { | 
|  | CAFFE_ENFORCE( | 
|  | OperatorBase::HasArgument("tiles"), | 
|  | "Argument `tiles` is missing and was not specified as input."); | 
|  | CAFFE_ENFORCE( | 
|  | OperatorBase::HasArgument("axis"), | 
|  | "Argument `axis` is missing and was not specified as input."); | 
|  | } | 
|  |  | 
|  | const auto& dY = Input(0); | 
|  | auto* dX = Output(0); | 
|  | const int axis = dY.canonical_axis_index(axis_); | 
|  |  | 
|  | // reshape output to be input "untiled" along the axis | 
|  | std::vector<std::int64_t> X_dims = dY.sizes().vec(); | 
|  | CAFFE_ENFORCE_EQ(X_dims[axis] % tiles_, 0); | 
|  | X_dims[axis] /= tiles_; | 
|  | dX->Resize(X_dims); | 
|  |  | 
|  | // size up to (and not including) axis | 
|  | const int outer_size = dX->size_to_dim(axis); | 
|  | // size from axis up | 
|  | const int inner_size = dX->size_from_dim(axis); | 
|  |  | 
|  | /** | 
|  | * How this works: | 
|  | * Imagine a 2D tensor (matrix) of size 3x10, tiled 2 times along axis 1 | 
|  | * (column). | 
|  | * This is equivalent to multiplying by a vector of 1s transposed. | 
|  | * The gradient of this is all 1s in the shape of the input matrix | 
|  | * (call it X). | 
|  | * So the output gradient should be the matrix multiplication result | 
|  | * of input gradient (gradient of tiled tensor output) and X. | 
|  | */ | 
|  | const T* dY_data = dY.template data<T>(); | 
|  | T* dX_data = dX->template mutable_data<T>(); | 
|  | return DoTileGradient<T>(outer_size, inner_size, dY_data, dX_data); | 
|  | } | 
|  |  | 
|  | private: | 
|  | std::int32_t GetArgFromTensor(const Tensor& tensor) { | 
|  | CAFFE_ENFORCE( | 
|  | tensor.IsType<std::int32_t>() || tensor.IsType<std::int64_t>()); | 
|  | std::int32_t val = -1; | 
|  | if (tensor.IsType<std::int32_t>()) { | 
|  | context_.template CopyToCPU<std::int32_t>( | 
|  | 1, tensor.data<std::int32_t>(), &val); | 
|  | } else if (tensor.IsType<std::int64_t>()) { | 
|  | std::int64_t val_int64; | 
|  | context_.template CopyToCPU<std::int64_t>( | 
|  | 1, tensor.data<std::int64_t>(), &val_int64); | 
|  | val = static_cast<std::int32_t>(val_int64); | 
|  | } | 
|  | return val; | 
|  | } | 
|  |  | 
|  | template <typename T> | 
|  | bool DoTileGradient( | 
|  | const int outer_size, | 
|  | const int inner_size, | 
|  | const T* dY, | 
|  | T* dX) { | 
|  | if (inner_size == 1) { | 
|  | const std::array<int, 2> dY_dims = {outer_size, tiles_}; | 
|  | const std::array<int, 2> dX_dims = {outer_size, 1}; | 
|  | math::ReduceSum<T, Context>( | 
|  | 2, dY_dims.data(), dX_dims.data(), T(1), dY, dX, &context_); | 
|  | } else { | 
|  | math::CopyMatrix<T, Context>( | 
|  | outer_size, | 
|  | inner_size, | 
|  | dY, | 
|  | inner_size * tiles_, | 
|  | dX, | 
|  | inner_size, | 
|  | &context_); | 
|  | for (const auto i : c10::irange(outer_size)) { | 
|  | const T* dY_ptr = dY + i * tiles_ * inner_size; | 
|  | T* dX_ptr = dX + i * inner_size; | 
|  | for (const auto j : c10::irange(1, tiles_)) { | 
|  | math::Add<T, Context>( | 
|  | inner_size, dX_ptr, dY_ptr + j * inner_size, dX_ptr, &context_); | 
|  | } | 
|  | } | 
|  | } | 
|  | return true; | 
|  | } | 
|  |  | 
|  | std::int32_t tiles_; | 
|  | std::int32_t axis_; | 
|  |  | 
|  | Tensor ones_; | 
|  | }; | 
|  |  | 
|  | } // namespace caffe2 | 
|  |  | 
|  | #endif // CAFFE2_OPERATORS_TILE_OP_H_ |