| #include "caffe2/operators/sequence_ops.h" |
| #include "caffe2/core/operator.h" |
| #include "caffe2/core/tensor.h" |
| |
| namespace caffe2 { |
| |
| template <> |
| template <typename T> |
| bool GatherPaddingOp<CPUContext>::DoRunWithType() { |
| const auto& in = Input(0); |
| CAFFE_ENFORCE_GE(in.ndim(), 1); |
| const int32_t outer_size = in.dims()[0]; |
| const auto block_size = std::accumulate( |
| in.dims().begin() + 1, in.dims().end(), 1, std::multiplies<TIndex>()); |
| const auto pad_width = startPaddingWidth_ + endPaddingWidth_; |
| |
| // if no lengths is provided, assume it is a single full-span entry |
| const int32_t* lengths_ptr = &outer_size; |
| int64_t lengths_size = 1; |
| if (InputSize() > 1) { |
| const auto& lengths = Input(1); |
| lengths_ptr = lengths.data<int32_t>(); |
| lengths_size = lengths.size(); |
| } |
| |
| std::vector<TIndex> padShape(in.dims().begin() + 1, in.dims().end()); |
| // output will contain accumulator over paddings |
| Output(0)->Resize(padShape); |
| T* padding_start_ptr = Output(0)->template mutable_data<T>(); |
| memset(padding_start_ptr, 0, sizeof(T) * block_size); |
| |
| // if no end_padding is provided, assume it's the same as start_padding |
| T* padding_end_ptr = padding_start_ptr; |
| if (OutputSize() == 2) { |
| Output(1)->Resize(padShape); |
| padding_end_ptr = Output(1)->template mutable_data<T>(); |
| memset(padding_end_ptr, 0, sizeof(T) * block_size); |
| } |
| |
| const auto* in_ptr = in.template data<T>(); |
| int64_t total_length = 0; |
| for (int i = 0; i < lengths_size; ++i) { |
| // check total length consistency |
| const auto length = lengths_ptr[i]; |
| total_length += length; |
| CAFFE_ENFORCE_LE(total_length, outer_size); |
| |
| // accumulate start paddings |
| for (int j = 0; j < startPaddingWidth_; ++j) { |
| for (int k = 0; k < block_size; ++k) { |
| padding_start_ptr[k] += in_ptr[k]; |
| } |
| in_ptr += block_size; |
| } |
| in_ptr += block_size * (length - pad_width); |
| // accumulate end paddings |
| for (int j = 0; j < endPaddingWidth_; ++j) { |
| for (int k = 0; k < block_size; ++k) { |
| padding_end_ptr[k] += in_ptr[k]; |
| } |
| in_ptr += block_size; |
| } |
| } |
| return true; |
| } |
| |
| template <> |
| template <typename T> |
| bool RemovePaddingOp<CPUContext>::DoRunWithType() { |
| const auto& in = Input(0); |
| CAFFE_ENFORCE_GE(in.ndim(), 1); |
| const int32_t outer_size = in.dims()[0]; |
| const auto block_size = std::accumulate( |
| in.dims().begin() + 1, in.dims().end(), 1, std::multiplies<TIndex>()); |
| const auto pad_width = startPaddingWidth_ + endPaddingWidth_; |
| |
| // if no lengths is provided, assume it is a single full-span entry |
| const int32_t* lengths_ptr = &outer_size; |
| int64_t lengths_size = 1; |
| if (InputSize() > 1) { |
| const auto& lengths = Input(1); |
| lengths_ptr = lengths.data<int32_t>(); |
| lengths_size = lengths.size(); |
| } |
| |
| auto* out = Output(0); |
| { |
| auto out_dims = in.dims(); |
| out_dims[0] -= pad_width * lengths_size; |
| out->Resize(std::move(out_dims)); |
| } |
| const auto* in_ptr = in.template data<T>(); |
| auto* out_ptr = out->template mutable_data<T>(); |
| int64_t total_length = 0; |
| for (int i = 0; i < lengths_size; ++i) { |
| // check that total length is consistent |
| const auto length = lengths_ptr[i]; |
| total_length += length; |
| CAFFE_ENFORCE_LE(total_length, outer_size); |
| std::copy( |
| in_ptr + block_size * startPaddingWidth_, |
| in_ptr + block_size * (length - endPaddingWidth_), |
| out_ptr); |
| in_ptr += block_size * length; |
| out_ptr += block_size * (length - pad_width); |
| } |
| if (OutputSize() == 1) { |
| return true; |
| } |
| auto* lengths_out = Output(1); |
| lengths_out->Resize(lengths_size); |
| std::transform( |
| lengths_ptr, |
| lengths_ptr + lengths_size, |
| lengths_out->mutable_data<int32_t>(), |
| [pad_width](int32_t x) { return x - pad_width; }); |
| return true; |
| } |
| |
| template <> |
| template <typename T> |
| bool AddPaddingOp<CPUContext>::DoRunWithType() { |
| const auto& in = Input(0); |
| CAFFE_ENFORCE_GE(in.ndim(), 1); |
| const int32_t outer_size = in.dims()[0]; |
| const auto block_size = std::accumulate( |
| in.dims().begin() + 1, in.dims().end(), 1, std::multiplies<TIndex>()); |
| |
| // if no lengths is provided, assume it is a single full-span entry |
| const int32_t* lengths_ptr = &outer_size; |
| int64_t lengths_size = 1; |
| if (InputSize() > 1) { |
| const auto& lengths = Input(1); |
| lengths_ptr = lengths.data<int32_t>(); |
| lengths_size = lengths.size(); |
| } |
| |
| // fetch paddings |
| // input_size == 2 : pad with zeros |
| // input_size == 3 : start and end paddings are the same |
| // input_size == 4 : different start and end paddings |
| const T* padding_start_ptr = nullptr; |
| const T* padding_end_ptr = nullptr; |
| if (InputSize() >= 3) { |
| auto& padding_start = Input(2); |
| CAFFE_ENFORCE_EQ(block_size, padding_start.size()); |
| padding_start_ptr = padding_start.template data<T>(); |
| } |
| if (InputSize() == 4) { |
| auto& padding_end = Input(3); |
| CAFFE_ENFORCE_EQ(block_size, padding_end.size()); |
| padding_end_ptr = padding_end.template data<T>(); |
| } else { |
| padding_end_ptr = padding_start_ptr; |
| } |
| |
| auto* out = Output(0); |
| { |
| auto out_dims = in.dims(); |
| out_dims[0] += (startPaddingWidth_ + endPaddingWidth_) * lengths_size; |
| out->Resize(std::move(out_dims)); |
| } |
| const auto* in_ptr = in.template data<T>(); |
| auto* out_ptr = out->template mutable_data<T>(); |
| int64_t total_length = 0; |
| for (int i = 0; i < lengths_size; ++i) { |
| // check that total length is consistent |
| const auto length = lengths_ptr[i]; |
| total_length += length; |
| CAFFE_ENFORCE_LE(total_length, outer_size); |
| // copy padding before |
| if (!padding_start_ptr) { |
| memset(out_ptr, 0, block_size * startPaddingWidth_ * sizeof(T)); |
| out_ptr += block_size * startPaddingWidth_; |
| } else { |
| for (int j = 0; j < startPaddingWidth_; ++j) { |
| std::copy(padding_start_ptr, padding_start_ptr + block_size, out_ptr); |
| out_ptr += block_size; |
| } |
| } |
| // copy payload |
| const auto num_elems = block_size * length; |
| std::copy(in_ptr, in_ptr + num_elems, out_ptr); |
| in_ptr += num_elems; |
| out_ptr += num_elems; |
| // copy padding after |
| if (!padding_end_ptr) { |
| memset(out_ptr, 0, block_size * endPaddingWidth_ * sizeof(T)); |
| out_ptr += block_size * endPaddingWidth_; |
| } else { |
| for (int j = 0; j < endPaddingWidth_; ++j) { |
| std::copy(padding_end_ptr, padding_end_ptr + block_size, out_ptr); |
| out_ptr += block_size; |
| } |
| } |
| } |
| if (OutputSize() == 1) { |
| return true; |
| } |
| auto* lengths_out = Output(1); |
| lengths_out->Resize(lengths_size); |
| const auto pad_width = startPaddingWidth_ + endPaddingWidth_; |
| std::transform( |
| lengths_ptr, |
| lengths_ptr + lengths_size, |
| lengths_out->mutable_data<int32_t>(), |
| [pad_width](int32_t x) { return x + pad_width; }); |
| return true; |
| } |
| |
| template <> |
| bool PadEmptySamplesOp<CPUContext>::RunOnDevice() { |
| auto& lengths = Input(0); |
| auto* lengthsPtr = lengths.template data<int32_t>(); |
| CAFFE_ENFORCE(lengths.ndim() == 1, "LENGTH should be 1-D"); |
| CAFFE_ENFORCE(InputSize() >= 1, "Input size must be no less than 1"); |
| |
| auto* out_lengths = Output(0); |
| int needPadding = 0; |
| int sumLen = 0; |
| for (int i = 0; i < lengths.size(); ++i) { |
| if (lengthsPtr[i] == 0) { |
| needPadding++; |
| } |
| sumLen += lengthsPtr[i]; |
| } |
| |
| out_lengths->Resize(lengths.size()); |
| auto* outLengthsPtr = out_lengths->template mutable_data<int32_t>(); |
| for (int i = 0; i < lengths.size(); ++i) { |
| if (lengthsPtr[i] == 0) { |
| outLengthsPtr[i] = 1; |
| } else { |
| outLengthsPtr[i] = lengthsPtr[i]; |
| } |
| } |
| |
| for (int k = 0; k < InputSize() - 1; k++) { |
| auto& features = Input(1 + k); |
| CAFFE_ENFORCE(features.ndim() >= 1, "FEATURE should at least 1-D"); |
| CAFFE_ENFORCE( |
| features.dim(0) == sumLen, "FEATURE and LENGTH should be consistent"); |
| const auto block_size = features.size_from_dim(1); |
| |
| auto* out_features = Output(1 + k); |
| auto outDim = features.dims(); |
| outDim.at(0) += needPadding; |
| out_features->Resize(outDim); |
| auto dst = |
| static_cast<char*>(out_features->raw_mutable_data(features.meta())); |
| auto src_base = static_cast<const char*>(features.raw_data()); |
| // copy data and add padding index as zero |
| Tensor<CPUContext> zero; |
| zero.Resize(block_size); |
| auto zeroPtr = |
| static_cast<const char*>(zero.raw_mutable_data(features.meta())); |
| int start_dest = 0; |
| int start_src = 0; |
| for (int i = 0; i < lengths.size(); ++i) { |
| if (lengthsPtr[i] == 0) { |
| context_.template CopyItems<CPUContext, CPUContext>( |
| features.meta(), |
| block_size, |
| zeroPtr, |
| dst + start_dest * features.meta().itemsize()); |
| start_dest += block_size; |
| } else { |
| auto src = src_base + start_src * features.meta().itemsize(); |
| context_.template CopyItems<CPUContext, CPUContext>( |
| features.meta(), |
| lengthsPtr[i] * block_size, |
| src, |
| dst + start_dest * features.meta().itemsize()); |
| start_src += lengthsPtr[i] * block_size; |
| start_dest += lengthsPtr[i] * block_size; |
| } |
| } |
| } |
| return true; |
| } |
| |
| REGISTER_CPU_OPERATOR(AddPadding, AddPaddingOp<CPUContext>); |
| REGISTER_CPU_OPERATOR(RemovePadding, RemovePaddingOp<CPUContext>); |
| REGISTER_CPU_OPERATOR(GatherPadding, GatherPaddingOp<CPUContext>); |
| REGISTER_CPU_OPERATOR(PadEmptySamples, PadEmptySamplesOp<CPUContext>); |
| |
| struct GetAddPadingGradient : public GradientMakerBase { |
| using GradientMakerBase::GradientMakerBase; |
| vector<OperatorDef> GetGradientDefs() override { |
| // whether to provide lengths as input to gradient |
| vector<std::string> g_inputs{GO(0)}; |
| if (Def().input_size() > 1) { |
| CAFFE_ENFORCE(Def().output_size() > 1); |
| g_inputs.push_back(O(1)); |
| } |
| |
| vector<OperatorDef> ops; |
| // gradient on the data |
| ops.push_back(CreateOperatorDef( |
| "RemovePadding", "", g_inputs, vector<string>{GI(0)})); |
| // gradient on the start_padding (and end_padding) |
| if (Def().input_size() >= 3) { |
| std::vector<string> padding_grads{GI(2)}; |
| if (Def().input_size() == 4) { |
| padding_grads.push_back(GI(3)); |
| } |
| auto g_inputs2 = g_inputs; |
| ops.push_back( |
| CreateOperatorDef("GatherPadding", "", g_inputs2, padding_grads)); |
| } |
| return ops; |
| } |
| }; |
| REGISTER_GRADIENT(AddPadding, GetAddPadingGradient); |
| |
| struct GetRemovePaddingGradient : public GradientMakerBase { |
| using GradientMakerBase::GradientMakerBase; |
| vector<OperatorDef> GetGradientDefs() override { |
| // whether to provide lengths as input to gradient |
| vector<std::string> g_inputs{GO(0)}; |
| if (Def().input_size() > 1) { |
| CAFFE_ENFORCE(Def().output_size() > 1); |
| g_inputs.push_back(O(1)); |
| } |
| |
| return SingleGradientDef("AddPadding", "", g_inputs, vector<string>{GI(0)}); |
| } |
| }; |
| REGISTER_GRADIENT(RemovePadding, GetRemovePaddingGradient); |
| |
| OPERATOR_SCHEMA(AddPadding) |
| .NumInputs(1, 4) |
| .NumOutputs(1, 2) |
| .SetDoc(R"DOC( |
| Given a partitioned tensor T<N, D1..., Dn>, where the partitions are |
| defined as ranges on its outer-most (slowest varying) dimension N, |
| with given range lengths, return a tensor T<N + 2*padding_width, D1 ..., Dn> |
| with paddings added to the start and end of each range. |
| Optionally, different paddings can be provided for beginning and end. Paddings |
| provided must be a tensor T<D1..., Dn>. |
| |
| If no padding is provided, add zero padding. |
| If no lengths vector is provided, add padding only once, |
| at the start and end of data. |
| )DOC") |
| .Arg( |
| "padding_width", |
| "Number of copies of padding to add around each range.") |
| .Arg( |
| "end_padding_width", |
| "(Optional) Specifies a different end-padding width.") |
| .Input(0, "data_in", "(T<N, D1..., Dn>) Input data") |
| .Input( |
| 1, |
| "lengths", |
| "(i64) Num of elements in each range. sum(lengths) = N.") |
| .Input(2, "start_padding", "T<D1..., Dn> Padding data for range start.") |
| .Input( |
| 3, |
| "end_padding", |
| "T<D1..., Dn> (optional) Padding for range end. " |
| "If not provided, start_padding is used as end_padding as well.") |
| .Output(0, "data_out", "(T<N + 2*padding_width, D1..., Dn>) Padded data.") |
| .Output(1, "lengths_out", "(i64, optional) Lengths for each padded range."); |
| |
| OPERATOR_SCHEMA(RemovePadding) |
| .NumInputs(1, 2) |
| .NumOutputs(1, 2) |
| .SetDoc(R"DOC( |
| Remove padding around the edges of each segment of the input data. This is |
| the reverse opration of AddPadding, and uses the same arguments and conventions |
| for input and output data format. |
| )DOC") |
| .Arg("padding_width", "Outer-size of padding to remove around each range.") |
| .Arg( |
| "end_padding_width", |
| "(Optional) Specifies a different end-padding width.") |
| .Input(0, "data_in", "T<N, D1..., Dn> Input data") |
| .Input( |
| 1, |
| "lengths", |
| "(i64) Num of elements in each range. sum(lengths) = N. " |
| "If not provided, considers all data as a single segment.") |
| .Output(0, "data_out", "(T<N - 2*padding_width, D1..., Dn>) Unpadded data.") |
| .Output( |
| 1, |
| "lengths_out", |
| "(i64, optional) Lengths for each unpadded range."); |
| |
| OPERATOR_SCHEMA(GatherPadding) |
| .NumInputs(2) |
| .NumOutputs(1, 2) |
| .SetDoc(R"DOC( |
| Gather the sum of start and end paddings in a padded input sequence. Used in |
| order to compute the gradients of AddPadding w.r.t the padding tensors. |
| )DOC") |
| .Arg("padding_width", "Outer-size of padding present around each range.") |
| .Arg( |
| "end_padding_width", |
| "(Optional) Specifies a different end-padding width.") |
| .Input(0, "data_in", "T<N, D1..., Dn> Padded input data") |
| .Input( |
| 1, |
| "lengths", |
| "(i64) Num of elements in each range. sum(lengths) = N. " |
| "If not provided, considers all data as a single segment.") |
| .Output( |
| 0, |
| "padding_sum", |
| "Sum of all start paddings, or of all " |
| "paddings if end_padding_sum is not provided.") |
| .Output( |
| 1, |
| "end_padding_sum", |
| "T<D1..., Dn> Sum of all end paddings, if provided."); |
| |
| OPERATOR_SCHEMA(PadEmptySamples) |
| .NumInputs(1, INT_MAX) |
| .NumOutputs(1, INT_MAX) |
| .SetDoc(R"DOC( |
| Pad empty field given lengths and index features, |
| |
| Input(0) is a blob pointing to the lengths of samples in one batch, |
| [Input(1),... Input(num_fields)] a list of tensors containing the data for |
| each field of the features. |
| |
| PadEmptySamples is thread safe. |
| )DOC") |
| .Input(0, "lengths", "A blob containing a pointer to the lengths.") |
| .Output( |
| 0, |
| "out_lengths", |
| "Tensor containing lengths with empty sample padded."); |
| |
| } // namespace caffe2 |