|  | #ifndef CAFFE2_OPERATORS_SEQUENCE_OPS_H_ | 
|  | #define CAFFE2_OPERATORS_SEQUENCE_OPS_H_ | 
|  |  | 
|  | #include "caffe2/core/operator.h" | 
|  | #include "caffe2/core/tensor.h" | 
|  | #include "caffe2/utils/math.h" | 
|  |  | 
|  | namespace caffe2 { | 
|  |  | 
|  | template <class Context> | 
|  | class GatherPaddingOp final : public Operator<Context> { | 
|  | public: | 
|  | USE_OPERATOR_CONTEXT_FUNCTIONS; | 
|  | template <class... Args> | 
|  | explicit GatherPaddingOp(Args&&... args) | 
|  | : Operator<Context>(std::forward<Args>(args)...), | 
|  | startPaddingWidth_( | 
|  | this->template GetSingleArgument<int>("padding_width", 1)), | 
|  | endPaddingWidth_( | 
|  | this->template GetSingleArgument<int>("end_padding_width", -1)) { | 
|  | CAFFE_ENFORCE_GE(startPaddingWidth_, 0); | 
|  | if (endPaddingWidth_ < 0) { | 
|  | endPaddingWidth_ = startPaddingWidth_; | 
|  | } | 
|  | } | 
|  |  | 
|  | bool RunOnDevice() override { | 
|  | if (startPaddingWidth_ == 0 && endPaddingWidth_ == 0) { | 
|  | Output(0)->Resize(std::vector<int64_t>(0)); | 
|  | auto output_0_data = Output(0)->template mutable_data<int64_t>(); | 
|  | // TODO(zhengxq): as suggested by salex@, change this to a loop. | 
|  | math::Set<int64_t, Context>( | 
|  | Output(0)->numel(), 0, output_0_data, &context_); | 
|  | if (OutputSize() == 2) { | 
|  | Output(1)->Resize(std::vector<int64_t>(0)); | 
|  | auto output_1_data = Output(1)->template mutable_data<int64_t>(); | 
|  | math::Set<int64_t, Context>( | 
|  | Output(1)->numel(), 0, output_1_data, &context_); | 
|  | } | 
|  | return true; | 
|  | } | 
|  | return DispatchHelper<TensorTypes<float, double, int, int64_t, bool>>::call( | 
|  | this, Input(0)); | 
|  | } | 
|  |  | 
|  | template <typename T> | 
|  | bool DoRunWithType() { | 
|  | const auto& in = Input(0); | 
|  | CAFFE_ENFORCE_GE(in.dim(), 1); | 
|  | const int32_t outer_size = in.sizes()[0]; | 
|  | const auto block_size = in.size_from_dim(1); | 
|  | const auto pad_width = startPaddingWidth_ + endPaddingWidth_; | 
|  |  | 
|  | // if no lengths is provided, assume it is a single full-span entry | 
|  | const int32_t* lengths_ptr = &outer_size; | 
|  | int64_t lengths_size = 1; | 
|  | if (InputSize() > 1) { | 
|  | const auto& lengths = Input(1); | 
|  | lengths_ptr = lengths.template data<int32_t>(); | 
|  | lengths_size = lengths.numel(); | 
|  | } | 
|  | std::vector<int64_t> padShape(in.sizes().begin() + 1, in.sizes().end()); | 
|  | // output will contain accumulator over paddings | 
|  | Output(0)->Resize(padShape); | 
|  | T* padding_start_ptr = Output(0)->template mutable_data<T>(); | 
|  | math::Set<T, Context>(block_size, 0.0, padding_start_ptr, &context_); | 
|  |  | 
|  | // if no end_padding is provided, assume it's the same as start_padding | 
|  | T* padding_end_ptr = padding_start_ptr; | 
|  | if (OutputSize() == 2) { | 
|  | Output(1)->Resize(padShape); | 
|  | padding_end_ptr = Output(1)->template mutable_data<T>(); | 
|  | math::Set<T, Context>(block_size, 0.0, padding_end_ptr, &context_); | 
|  | } | 
|  | GatherPadding<T>( | 
|  | outer_size, | 
|  | lengths_size, | 
|  | block_size, | 
|  | pad_width, | 
|  | in.template data<T>(), | 
|  | lengths_ptr, | 
|  | padding_start_ptr, | 
|  | padding_end_ptr); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | private: | 
|  | template <typename T> | 
|  | void GatherPadding( | 
|  | const int outer_size, | 
|  | const int lengths_size, | 
|  | const int block_size, | 
|  | const int pad_width, | 
|  | const T* in_ptr, | 
|  | const int* lengths_ptr, | 
|  | T* padding_start_ptr, | 
|  | T* padding_end_ptr); | 
|  |  | 
|  | int startPaddingWidth_; | 
|  | int endPaddingWidth_; | 
|  | // Scratch space required by the CUDA version | 
|  | Tensor lengths_prefix_sum_buffer_{Context::GetDeviceType()}; | 
|  | Tensor lengths_prefix_sum_{Context::GetDeviceType()}; | 
|  | }; | 
|  |  | 
|  | template <class Context> | 
|  | class RemovePaddingOp final : public Operator<Context> { | 
|  | public: | 
|  | USE_OPERATOR_CONTEXT_FUNCTIONS; | 
|  | template <class... Args> | 
|  | explicit RemovePaddingOp(Args&&... args) | 
|  | : Operator<Context>(std::forward<Args>(args)...), | 
|  | startPaddingWidth_( | 
|  | this->template GetSingleArgument<int>("padding_width", 1)), | 
|  | endPaddingWidth_( | 
|  | this->template GetSingleArgument<int>("end_padding_width", -1)) { | 
|  | CAFFE_ENFORCE_GE(startPaddingWidth_, 0); | 
|  | if (endPaddingWidth_ < 0) { | 
|  | endPaddingWidth_ = startPaddingWidth_; | 
|  | } | 
|  | } | 
|  |  | 
|  | bool RunOnDevice() override { | 
|  | if (startPaddingWidth_ == 0 && endPaddingWidth_ == 0) { | 
|  | Output(0)->CopyFrom(Input(0), true /*async*/); | 
|  | if (OutputSize() == 2) { | 
|  | Output(1)->CopyFrom(Input(1), true /*async*/); | 
|  | } | 
|  | return true; | 
|  | } | 
|  | return DispatchHelper<TensorTypes<float, double, int, int64_t, bool>>::call( | 
|  | this, Input(0)); | 
|  | } | 
|  |  | 
|  | template <typename T> | 
|  | bool DoRunWithType(); | 
|  |  | 
|  | private: | 
|  | int startPaddingWidth_; | 
|  | int endPaddingWidth_; | 
|  |  | 
|  | // Scratch space required by the CUDA version | 
|  | Tensor lengths_prefix_sum_buffer_{Context::GetDeviceType()}; | 
|  | Tensor lengths_prefix_sum_{Context::GetDeviceType()}; | 
|  | }; | 
|  |  | 
|  | template <class Context> | 
|  | class AddPaddingOp final : public Operator<Context> { | 
|  | public: | 
|  | USE_OPERATOR_CONTEXT_FUNCTIONS; | 
|  | template <class... Args> | 
|  | explicit AddPaddingOp(Args&&... args) | 
|  | : Operator<Context>(std::forward<Args>(args)...), | 
|  | startPaddingWidth_( | 
|  | this->template GetSingleArgument<int>("padding_width", 1)), | 
|  | endPaddingWidth_( | 
|  | this->template GetSingleArgument<int>("end_padding_width", -1)) { | 
|  | CAFFE_ENFORCE_GE(startPaddingWidth_, 0); | 
|  | if (endPaddingWidth_ < 0) { | 
|  | endPaddingWidth_ = startPaddingWidth_; | 
|  | } | 
|  | } | 
|  |  | 
|  | bool RunOnDevice() override { | 
|  | if (startPaddingWidth_ == 0 && endPaddingWidth_ == 0) { | 
|  | Output(0)->CopyFrom(Input(0), true /*async*/); | 
|  | if (OutputSize() == 2) { | 
|  | Output(1)->CopyFrom(Input(1), true /*async*/); | 
|  | } | 
|  | return true; | 
|  | } | 
|  | return DispatchHelper<TensorTypes<float, double, int, int64_t, bool>>::call( | 
|  | this, Input(0)); | 
|  | } | 
|  |  | 
|  | template <typename T> | 
|  | bool DoRunWithType() { | 
|  | const auto& in = Input(0); | 
|  | CAFFE_ENFORCE_GE(in.dim(), 1); | 
|  | const int32_t outer_size = in.sizes()[0]; | 
|  | const auto block_size = in.size_from_dim(1); | 
|  |  | 
|  | // if no lengths is provided, assume it is a single full-span entry | 
|  | const int32_t* lengths_ptr = nullptr; | 
|  | int32_t lengths_size = 1; | 
|  | if (InputSize() > 1) { | 
|  | const auto& lengths = Input(1); | 
|  | lengths_ptr = lengths.template data<int32_t>(); | 
|  | lengths_size = lengths.numel(); | 
|  | } | 
|  |  | 
|  | // fetch paddings | 
|  | // input_size == 2 : pad with zeros | 
|  | // input_size == 3 : start and end paddings are the same | 
|  | // input_size == 4 : different start and end paddings | 
|  | const T* padding_start_ptr = nullptr; | 
|  | const T* padding_end_ptr = nullptr; | 
|  | if (InputSize() >= 3) { | 
|  | auto& padding_start = Input(2); | 
|  | CAFFE_ENFORCE_EQ(block_size, padding_start.numel()); | 
|  | padding_start_ptr = padding_start.template data<T>(); | 
|  | } | 
|  | if (InputSize() == 4) { | 
|  | auto& padding_end = Input(3); | 
|  | CAFFE_ENFORCE_EQ(block_size, padding_end.numel()); | 
|  | padding_end_ptr = padding_end.template data<T>(); | 
|  | } else { | 
|  | padding_end_ptr = padding_start_ptr; | 
|  | } | 
|  |  | 
|  | auto out_dims = in.sizes().vec(); | 
|  | out_dims[0] += (startPaddingWidth_ + endPaddingWidth_) * lengths_size; | 
|  | auto* out = Output(0, std::move(out_dims), at::dtype<T>()); | 
|  |  | 
|  | const auto* in_ptr = in.template data<T>(); | 
|  | auto* out_ptr = out->template mutable_data<T>(); | 
|  |  | 
|  | return MakePadding<T>( | 
|  | in_ptr, | 
|  | out_ptr, | 
|  | lengths_ptr, | 
|  | lengths_size, | 
|  | outer_size, | 
|  | padding_start_ptr, | 
|  | padding_end_ptr, | 
|  | block_size); | 
|  | } | 
|  |  | 
|  | private: | 
|  | template <typename T> | 
|  | bool MakePadding( | 
|  | const T* in_ptr, | 
|  | T* out_ptr, | 
|  | const int32_t* lengths_ptr, | 
|  | int32_t lengths_size, | 
|  | int32_t outer_size, | 
|  | const T* padding_start_ptr, | 
|  | const T* padding_end_ptr, | 
|  | int64_t block_size); | 
|  |  | 
|  | int startPaddingWidth_; | 
|  | int endPaddingWidth_; | 
|  |  | 
|  | // Scratch space required by the CUDA version | 
|  | Tensor lengths_prefix_sum_buffer_{Context::GetDeviceType()}; | 
|  | Tensor lengths_prefix_sum_{Context::GetDeviceType()}; | 
|  | }; | 
|  |  | 
|  | template <class Context> | 
|  | class PadEmptySamplesOp : public Operator<Context> { | 
|  | public: | 
|  | USE_OPERATOR_CONTEXT_FUNCTIONS; | 
|  | template <class... Args> | 
|  | explicit PadEmptySamplesOp(Args&&... args) | 
|  | : Operator<Context>(std::forward<Args>(args)...) {} | 
|  |  | 
|  | bool RunOnDevice() override; | 
|  | }; | 
|  |  | 
|  | } // namespace caffe2 | 
|  |  | 
|  | #endif // CAFFE2_OPERATORS_SEQUENCE_OPS_H_ |