| /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
| |
| Licensed under the Apache License, Version 2.0 (the "License"); |
| you may not use this file except in compliance with the License. |
| You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| ==============================================================================*/ |
| |
| #ifndef TENSORFLOW_CORE_KERNELS_MIRROR_PAD_OP_H_ |
| #define TENSORFLOW_CORE_KERNELS_MIRROR_PAD_OP_H_ |
| |
| #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" |
| #include "tensorflow/core/framework/tensor_types.h" |
| #include "tensorflow/core/platform/types.h" |
| |
| namespace Eigen { |
| template <typename PaddingDimensions, typename XprType> |
| class TensorMirrorPadOp; |
| |
| namespace internal { |
| template <typename PaddingDimensions, typename XprType> |
| struct traits<TensorMirrorPadOp<PaddingDimensions, XprType>> |
| : public traits<XprType> { |
| typedef typename XprType::Scalar Scalar; |
| typedef traits<XprType> XprTraits; |
| typedef typename XprTraits::StorageKind StorageKind; |
| typedef typename XprTraits::Index Index; |
| typedef typename XprType::Nested Nested; |
| typedef typename remove_reference<Nested>::type _Nested; |
| static constexpr int NumDimensions = XprTraits::NumDimensions; |
| static constexpr int Layout = XprTraits::Layout; |
| }; |
| |
| template <typename PaddingDimensions, typename XprType> |
| struct eval<TensorMirrorPadOp<PaddingDimensions, XprType>, Eigen::Dense> { |
| typedef const TensorMirrorPadOp<PaddingDimensions, XprType>& type; |
| }; |
| |
| template <typename PaddingDimensions, typename XprType> |
| struct nested< |
| TensorMirrorPadOp<PaddingDimensions, XprType>, 1, |
| typename eval<TensorMirrorPadOp<PaddingDimensions, XprType>>::type> { |
| typedef TensorMirrorPadOp<PaddingDimensions, XprType> type; |
| }; |
| } // namespace internal |
| |
| template <typename PaddingDimensions, typename XprType> |
| class TensorMirrorPadOp |
| : public TensorBase<TensorMirrorPadOp<PaddingDimensions, XprType>, |
| ReadOnlyAccessors> { |
| public: |
| typedef typename Eigen::internal::traits<TensorMirrorPadOp>::Scalar Scalar; |
| typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; |
| typedef typename XprType::CoeffReturnType CoeffReturnType; |
| typedef typename Eigen::internal::nested<TensorMirrorPadOp>::type Nested; |
| typedef typename Eigen::internal::traits<TensorMirrorPadOp>::StorageKind |
| StorageKind; |
| typedef typename Eigen::internal::traits<TensorMirrorPadOp>::Index Index; |
| |
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMirrorPadOp( |
| const XprType& expr, const PaddingDimensions& padding_dims, Index offset) |
| : xpr_(expr), padding_dims_(padding_dims), offset_(offset) {} |
| |
| EIGEN_DEVICE_FUNC |
| const PaddingDimensions& padding() const { return padding_dims_; } |
| |
| EIGEN_DEVICE_FUNC |
| Index offset() const { return offset_; } |
| |
| EIGEN_DEVICE_FUNC |
| const typename internal::remove_all<typename XprType::Nested>::type& |
| expression() const { |
| return xpr_; |
| } |
| |
| protected: |
| typename XprType::Nested xpr_; |
| const PaddingDimensions padding_dims_; |
| const Index offset_; |
| }; |
| |
| // Eval as rvalue |
| template <typename PaddingDimensions, typename ArgType, typename Device> |
| struct TensorEvaluator<const TensorMirrorPadOp<PaddingDimensions, ArgType>, |
| Device> { |
| typedef TensorMirrorPadOp<PaddingDimensions, ArgType> XprType; |
| typedef typename XprType::Index Index; |
| static constexpr int Dims = internal::array_size<PaddingDimensions>::value; |
| typedef DSizes<Index, Dims> Dimensions; |
| typedef typename XprType::Scalar Scalar; |
| typedef typename XprType::CoeffReturnType CoeffReturnType; |
| // Copied from Eigen3 Github version 0e806c1. |
| typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; |
| |
| enum { |
| IsAligned = false, |
| PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, |
| BlockAccess = false, |
| Layout = TensorEvaluator<ArgType, Device>::Layout, |
| CoordAccess = true, |
| RawAccess = false |
| }; |
| |
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, |
| const Device& device) |
| : impl_(op.expression(), device), padding_(op.padding()) { |
| EIGEN_STATIC_ASSERT(Dims > 0, YOU_MADE_A_PROGRAMMING_MISTAKE) |
| |
| // op.offset() == 0 if padding mode is symmetric. |
| // op.offset() == 1 if padding mode is reflect. |
| eigen_assert(op.offset() == 0 || op.offset() == 1); |
| left_offset_ = -1 + op.offset(); |
| right_offset_ = -1 - op.offset(); |
| |
| // This should trigger compilation error if padding dimensions and |
| // expression dimensions do not match. |
| dimensions_ = impl_.dimensions(); |
| for (int dim = 0; dim < Dims; ++dim) { |
| eigen_assert(padding_[dim].first + op.offset() <= dimensions_[dim]); |
| eigen_assert(padding_[dim].second + op.offset() <= dimensions_[dim]); |
| dimensions_[dim] += padding_[dim].first + padding_[dim].second; |
| } |
| |
| const auto& input_dims = impl_.dimensions(); |
| if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { |
| input_strides_[0] = 1; |
| output_strides_[0] = 1; |
| for (int i = 0; i < Dims - 1; ++i) { |
| input_strides_[i + 1] = input_strides_[i] * input_dims[i]; |
| output_strides_[i + 1] = output_strides_[i] * dimensions_[i]; |
| } |
| } else { |
| input_strides_[numext::maxi(0, Dims - 1)] = 1; |
| output_strides_[numext::maxi(0, Dims - 1)] = 1; |
| for (int i = Dims - 1; i > 0; --i) { |
| input_strides_[i - 1] = input_strides_[i] * input_dims[i]; |
| output_strides_[i - 1] = output_strides_[i] * dimensions_[i]; |
| } |
| } |
| } |
| |
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { |
| return dimensions_; |
| } |
| |
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { |
| impl_.evalSubExprsIfNeeded(nullptr); |
| return true; |
| } |
| |
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { impl_.cleanup(); } |
| |
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType |
| coeff(Index index) const { |
| eigen_assert(index < dimensions().TotalSize()); |
| const Index input_index = ToInputIndex(index); |
| return impl_.coeff(input_index); |
| } |
| |
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType |
| coeff(array<Index, Dims> coords) const { |
| for (int dim = 0; dim < Dims; ++dim) { |
| coords[dim] = ToInputCoord(coords[dim], dim); |
| } |
| ReadInputHelper<TensorEvaluator<ArgType, Device>::CoordAccess> helper; |
| return helper(coords, input_strides_, impl_); |
| } |
| |
| template <int LoadMode> |
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType |
| packet(Index index) const { |
| constexpr int kPacketSize = |
| internal::unpacket_traits<PacketReturnType>::size; |
| |
| EIGEN_STATIC_ASSERT(kPacketSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) |
| eigen_assert(index + kPacketSize <= dimensions().TotalSize()); |
| |
| // Find the effective inner-most dimension where padding actually happens. |
| // NOTE: This is independent of index argument, and can be done in the |
| // constructor to save computation. However, if packet access does not |
| // happen, then moving to constructor will incur needless overhead. |
| int dim = -1; |
| if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { |
| for (int k = 0; k < Dims; ++k) { |
| if (padding_[k].first != 0 || padding_[k].second != 0) { |
| dim = k; |
| break; |
| } |
| } |
| } else { |
| for (int k = Dims - 1; k >= 0; --k) { |
| if (padding_[k].first != 0 || padding_[k].second != 0) { |
| dim = k; |
| break; |
| } |
| } |
| } |
| |
| const Index input_index = ToInputIndex(index); |
| |
| // If dim < 0, this means there is no padding at all. |
| if (dim < 0) { |
| return impl_.template packet<Unaligned>(input_index); |
| } |
| |
| // Check if the way from the begin of the packet to the end of the packet |
| // is paved with contiguous road. That is, the indices must be between the |
| // padded region in the effective inner-most dimension. |
| const Index left = padding_[dim].first * output_strides_[dim]; |
| const Index right = |
| (dimensions_[dim] - padding_[dim].second) * output_strides_[dim]; |
| |
| if (left <= index && (index + kPacketSize - 1) < right) { |
| return impl_.template packet<Unaligned>(input_index); |
| } |
| |
| // If the road is not contiguous, then fall back to coeff(). |
| EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type |
| values[kPacketSize]; |
| values[0] = impl_.coeff(input_index); |
| for (int i = 1; i < kPacketSize; ++i) { |
| values[i] = coeff(index + i); |
| } |
| PacketReturnType result = internal::pload<PacketReturnType>(values); |
| return result; |
| } |
| |
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost |
| costPerCoeff(bool vectorized) const { |
| constexpr int kPacketSize = |
| internal::unpacket_traits<PacketReturnType>::size; |
| |
| const double compute_cost = Dims * (7 * TensorOpCost::AddCost<Index>() + |
| 2 * TensorOpCost::MulCost<Index>() + |
| TensorOpCost::DivCost<Index>()); |
| return impl_.costPerCoeff(vectorized) + |
| TensorOpCost(1, 0, compute_cost, vectorized, kPacketSize); |
| } |
| |
| EIGEN_DEVICE_FUNC Scalar* data() const { return nullptr; } |
| |
| protected: |
| using Coords = array<Index, Dims>; |
| |
| // Full template specialization is not allowed within non-fully specialized |
| // template class. Adding a dummy parameter to make specializations partial. |
| template <bool CoordAccess, bool dummy = true> |
| struct ReadInputHelper; |
| |
| template <bool dummy> |
| struct ReadInputHelper<false, dummy> { |
| template <typename Eval> |
| EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index |
| operator()(const Coords& coord, const Coords& strides, const Eval& eval) { |
| Index index = 0; |
| for (int k = 0; k < Dims; ++k) { |
| index += coord[k] * strides[k]; |
| } |
| return eval.coeff(index); |
| } |
| }; |
| |
| template <bool dummy> |
| struct ReadInputHelper<true, dummy> { |
| template <typename Eval> |
| EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index |
| operator()(const Coords& coord, const Coords& strides, const Eval& eval) { |
| return eval.coeff(coord); |
| } |
| }; |
| |
| EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index ToInputCoord(Index k, |
| int dim) const { |
| const Index m = impl_.dimensions()[dim]; |
| k -= padding_[dim].first; |
| if (k < 0) { |
| return -k + left_offset_; |
| } |
| if (k < m) { |
| return k; |
| } |
| return m - (k - m) + right_offset_; |
| } |
| |
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index |
| ToInputIndex(const Coords& coords) const { |
| Index input_index = 0; |
| for (int dim = 0; dim < Dims; ++dim) { |
| input_index += ToInputCoord(coords[dim], dim) * input_strides_[dim]; |
| } |
| return input_index; |
| } |
| |
| EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index ToInputIndex(Index index) const { |
| Index input_index = 0; |
| if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { |
| for (int dim = Dims - 1; dim > 0; --dim) { |
| const Index k = index / output_strides_[dim]; |
| index -= k * output_strides_[dim]; |
| input_index += ToInputCoord(k, dim) * input_strides_[dim]; |
| } |
| input_index += ToInputCoord(index, 0); |
| } else { |
| for (int dim = 0; dim < Dims - 1; ++dim) { |
| const Index k = index / output_strides_[dim]; |
| index -= k * output_strides_[dim]; |
| input_index += ToInputCoord(k, dim) * input_strides_[dim]; |
| } |
| input_index += ToInputCoord(index, Dims - 1); |
| } |
| |
| return input_index; |
| } |
| |
| TensorEvaluator<ArgType, Device> impl_; |
| PaddingDimensions padding_; |
| Dimensions dimensions_; |
| array<Index, Dims> input_strides_; |
| array<Index, Dims> output_strides_; |
| |
| Index left_offset_; |
| Index right_offset_; |
| }; |
| } // namespace Eigen |
| |
| namespace tensorflow { |
| namespace functor { |
| |
| // offset argument must be either 0 or 1. This controls whether the boundary |
| // values are replicated (offset == 0) or not replicated (offset == 1). |
| template <typename Device, typename T, typename Tpaddings, int Dims> |
| struct MirrorPad { |
| void operator()(const Device& device, |
| typename TTypes<T, Dims, int32>::Tensor output, |
| typename TTypes<T, Dims, int32>::ConstTensor input, |
| typename TTypes<Tpaddings>::ConstMatrix padding, int offset) { |
| Eigen::array<Eigen::IndexPair<int32>, Dims> padding_dims; |
| |
| for (int i = 0; i < Dims; ++i) { |
| padding_dims[i] = Eigen::IndexPair<int32>(padding(i, 0), padding(i, 1)); |
| } |
| |
| output.device(device) = MirrorPadOp(input, padding_dims, offset); |
| } |
| |
| template <typename PaddingDimensions, typename Derived> |
| static const Eigen::TensorMirrorPadOp<PaddingDimensions, const Derived> |
| MirrorPadOp( |
| const Eigen::TensorBase<Derived, Eigen::ReadOnlyAccessors>& tensor, |
| const PaddingDimensions& padding, int offset) { |
| return Eigen::TensorMirrorPadOp<PaddingDimensions, const Derived>( |
| static_cast<const Derived&>(tensor), padding, offset); |
| } |
| }; |
| |
| // offset argument must be either 0 or 1. This controls whether the boundary |
| // values are replicated (offset == 0) or not replicated (offset == 1). |
| template <typename Device, typename T, typename Tpaddings, int Dims> |
| struct MirrorPadGrad { |
| void operator()(const Device& device, |
| typename TTypes<T, Dims, int32>::Tensor output, |
| typename TTypes<T, Dims, int32>::ConstTensor input, |
| typename TTypes<Tpaddings>::ConstMatrix paddings, int offset, |
| typename TTypes<T, Dims, int32>::Tensor scratch) { |
| // Copy the gradient input into the scratch buffer. |
| scratch.device(device) = input; |
| |
| Eigen::array<int32, Dims> lhs_offsets; |
| Eigen::array<int32, Dims> rhs_offsets; |
| Eigen::array<int32, Dims> extents; |
| Eigen::array<bool, Dims> reverses; |
| |
| for (int i = 0; i < Dims; ++i) { |
| lhs_offsets[i] = 0; |
| rhs_offsets[i] = 0; |
| extents[i] = scratch.dimension(i); |
| reverses[i] = false; |
| } |
| |
| // At this point, the central part (non-padded area) does not include the |
| // gradients back-propagated through padded areas. Those gradient components |
| // need be added to the central part. |
| // |
| // Note that a gradient input element falls into a padded area iff in at |
| // least one dimension i, the coordinate x(i) is in the range (python-style) |
| // [:paddings(i,0)] or [-paddings(i,1):]. |
| |
| for (int i = 0; i < Dims; ++i) { |
| reverses[i] = true; |
| |
| // This handles the case when coordinate in dimension i is in the range |
| // [:paddings(i,0)]. This portion is added to the range |
| // [paddings(i,0) + offset:2 * paddings(i,0) + offset]. |
| if (paddings(i, 0) > 0) { |
| rhs_offsets[i] = 0; |
| lhs_offsets[i] = paddings(i, 0) + offset; |
| extents[i] = paddings(i, 0); |
| |
| scratch.slice(lhs_offsets, extents).device(device) += |
| scratch.slice(rhs_offsets, extents).reverse(reverses); |
| } |
| |
| // This handles the case when coordinate in dimension i is in the range |
| // [-paddings(i,1):]. This portion is added to the range |
| // [-2 * paddings(i,1) - offset:-paddings(i,1) - offset]. |
| if (paddings(i, 1) > 0) { |
| rhs_offsets[i] = scratch.dimension(i) - paddings(i, 1); |
| lhs_offsets[i] = rhs_offsets[i] - paddings(i, 1) - offset; |
| extents[i] = paddings(i, 1); |
| |
| scratch.slice(lhs_offsets, extents).device(device) += |
| scratch.slice(rhs_offsets, extents).reverse(reverses); |
| } |
| |
| reverses[i] = false; |
| lhs_offsets[i] = paddings(i, 0); |
| rhs_offsets[i] = paddings(i, 0); |
| extents[i] = output.dimension(i); |
| |
| // At this point, scratch buffer contains gradient input as if paddings |
| // for dimension k = 0,...,i are zeros. Therefore after the loop |
| // termination, the central part of the scratch buffer contains the folded |
| // gradients. |
| } |
| |
| // Copy the central part of the scratch buffer to the output. |
| output.device(device) = scratch.slice(rhs_offsets, extents); |
| } |
| }; |
| } // namespace functor |
| } // namespace tensorflow |
| |
| #endif // TENSORFLOW_CORE_KERNELS_MIRROR_PAD_OP_H_ |