| #pragma once |
| |
| #include <c10/util/Optional.h> |
| #include <ATen/core/TensorBody.h> |
| #include <ATen/ExpandUtils.h> |
| #include <ATen/Functions.h> |
| |
| namespace at { |
| namespace indexing { |
| |
| const int64_t INDEX_MAX = std::numeric_limits<int64_t>::max(); |
| const int64_t INDEX_MIN = std::numeric_limits<int64_t>::min(); |
| |
| enum class TensorIndexType { None, Ellipsis, Integer, Boolean, Slice, Tensor }; |
| |
| constexpr c10::nullopt_t None{c10::nullopt_t::init()}; |
| |
| struct CAFFE2_API EllipsisIndexType final { EllipsisIndexType() {} }; |
| CAFFE2_API extern const EllipsisIndexType Ellipsis; |
| |
| struct CAFFE2_API Slice final { |
| public: |
| // This mirrors `__PySlice_Unpack` in torch/csrc/utils/python_compat.h |
| Slice( |
| c10::optional<int64_t> start_index = c10::nullopt, |
| c10::optional<int64_t> stop_index = c10::nullopt, |
| c10::optional<int64_t> step_index = c10::nullopt) { |
| if (!step_index.has_value()) { |
| step_ = 1; |
| } else { |
| step_ = step_index.value(); |
| TORCH_CHECK_VALUE(step_ != 0, "slice step cannot be zero"); |
| |
| // Here step might be -INDEX_MAX-1; in this case we replace it |
| // with -INDEX_MAX. This doesn't affect the semantics, and it |
| // guards against later undefined behaviour resulting from code that |
| // does "step = -step" as part of a slice reversal. |
| if (step_ < -INDEX_MAX) |
| step_ = -INDEX_MAX; |
| } |
| if (!start_index.has_value()) { |
| start_ = step_ < 0 ? INDEX_MAX : 0; |
| } else { |
| start_ = start_index.value(); |
| } |
| if (!stop_index.has_value()) { |
| stop_ = step_ < 0 ? INDEX_MIN : INDEX_MAX; |
| } else { |
| stop_ = stop_index.value(); |
| } |
| } |
| |
| inline int64_t start() const { |
| return start_; |
| } |
| |
| inline int64_t stop() const { |
| return stop_; |
| } |
| |
| inline int64_t step() const { |
| return step_; |
| } |
| |
| private: |
| int64_t start_; |
| int64_t stop_; |
| int64_t step_; |
| }; |
| |
| CAFFE2_API std::ostream& operator<<(std::ostream& stream, const Slice& slice); |
| |
| // `at::indexing::TensorIndex` is used for converting C++ tensor indices such as |
| // `{None, "...", Ellipsis, 0, true, Slice(1, None, 2), torch::tensor({1, 2})}` |
| // into its equivalent `std::vector<TensorIndex>`, so that further tensor indexing |
| // operations can be performed using the supplied indices. |
| // |
| // There is one-to-one correspondence between Python and C++ tensor index types: |
| // Python | C++ |
| // ----------------------------------------------------- |
| // `None` | `at::indexing::None` |
| // `Ellipsis` | `at::indexing::Ellipsis` |
| // `...` | `"..."` |
| // `123` | `123` |
| // `True` / `False` | `true` / `false` |
| // `:` | `Slice()` / `Slice(None, None)` |
| // `::` | `Slice()` / `Slice(None, None, None)` |
| // `1:` | `Slice(1, None)` |
| // `1::` | `Slice(1, None, None)` |
| // `:3` | `Slice(None, 3)` |
| // `:3:` | `Slice(None, 3, None)` |
| // `::2` | `Slice(None, None, 2)` |
| // `1:3` | `Slice(1, 3)` |
| // `1::2` | `Slice(1, None, 2)` |
| // `:3:2` | `Slice(None, 3, 2)` |
| // `1:3:2` | `Slice(1, 3, 2)` |
| // `torch.tensor([1, 2])`) | `torch::tensor({1, 2})` |
| struct CAFFE2_API TensorIndex final { |
| // Case 1: `at::indexing::None` |
| TensorIndex(c10::nullopt_t) : type_(TensorIndexType::None) {} |
| |
| // Case 2: "..." / `at::indexing::Ellipsis` |
| TensorIndex(at::indexing::EllipsisIndexType) : type_(TensorIndexType::Ellipsis) {} |
| TensorIndex(const char *str) : TensorIndex(at::indexing::Ellipsis) { |
| TORCH_CHECK_VALUE( |
| strcmp(str, "...") == 0, |
| "Expected \"...\" to represent an ellipsis index, but got \"", str, "\""); |
| } |
| |
| // Case 3: Integer value |
| TensorIndex(int64_t integer) : integer_(integer), type_(TensorIndexType::Integer) {} |
| TensorIndex(int integer) : TensorIndex((int64_t)integer) {} |
| |
| // Case 4: Boolean value |
| template <class T, |
| class = typename std::enable_if<std::is_same<bool, T>::value>::type > |
| TensorIndex(T boolean) : boolean_(boolean), type_(TensorIndexType::Boolean) {} |
| |
| // Case 5: Slice represented in `at::indexing::Slice` form |
| TensorIndex(Slice slice) : slice_(std::move(slice)), type_(TensorIndexType::Slice) {} |
| |
| // Case 6: Tensor value |
| TensorIndex(Tensor tensor) : tensor_(std::move(tensor)), type_(TensorIndexType::Tensor) {} |
| |
| inline bool is_none() const { |
| return type_ == TensorIndexType::None; |
| } |
| |
| inline bool is_ellipsis() const { |
| return type_ == TensorIndexType::Ellipsis; |
| } |
| |
| inline bool is_integer() const { |
| return type_ == TensorIndexType::Integer; |
| } |
| |
| inline int64_t integer() const { |
| return integer_; |
| } |
| |
| inline bool is_boolean() const { |
| return type_ == TensorIndexType::Boolean; |
| } |
| |
| inline bool boolean() const { |
| return boolean_; |
| } |
| |
| inline bool is_slice() const { |
| return type_ == TensorIndexType::Slice; |
| } |
| |
| inline const Slice& slice() const { |
| return slice_; |
| } |
| |
| inline bool is_tensor() const { |
| return type_ == TensorIndexType::Tensor; |
| } |
| |
| inline const Tensor& tensor() const { |
| return tensor_; |
| } |
| |
| private: |
| int64_t integer_; |
| bool boolean_; |
| Slice slice_; |
| Tensor tensor_; |
| TensorIndexType type_; |
| }; |
| |
| CAFFE2_API std::ostream& operator<<(std::ostream& stream, const TensorIndex& tensor_index); |
| CAFFE2_API std::ostream& operator<<(std::ostream& stream, const std::vector<TensorIndex>& tensor_indices); |
| |
| namespace impl { |
| static inline Tensor applySlice( |
| const Tensor& self, |
| int64_t dim, |
| int64_t start, |
| int64_t stop, |
| int64_t step, |
| bool disable_slice_optimization, |
| const at::Device& self_device, |
| const IntArrayRef& self_sizes) { |
| // TODO: implement negative step |
| TORCH_CHECK_VALUE(step > 0, "step must be greater than zero"); |
| |
| // Skip this optimization if we are tracing, as the trace may be polymorphic |
| // over the shape of the `self` tensor, and we still want to record |
| // the slice. |
| int64_t length = (self_device == at::kCPU || self_device == at::kCUDA) ? self_sizes[dim] : self.size(dim); |
| if (!disable_slice_optimization && start == 0 && stop == length && step == 1) { |
| return self; |
| } |
| return self.slice(dim, start, stop, step); |
| } |
| |
| static inline Tensor applySelect( |
| const Tensor& self, |
| int64_t dim, |
| int64_t index, |
| int64_t real_dim, |
| const at::Device& self_device, |
| const IntArrayRef& self_sizes) { |
| TORCH_CHECK_INDEX( |
| !(index == 0 && dim == 0 && self_sizes.size() == 0), |
| "invalid index of a 0-dim tensor. ", |
| "Use `tensor.item()` in Python or `tensor.item<T>()` in C++ to convert a 0-dim tensor to a number"); |
| |
| int64_t size = self_sizes[dim]; |
| TORCH_CHECK_INDEX( |
| index >= -size && index < size, |
| "index ", index, " is out of bounds for dimension ", real_dim, " with size ", size); |
| |
| // if the index is negative, do not normalize it because that would fix the index |
| // on the current tensor size in the tracer. |
| // aten::select also works on negative indices |
| return self.select(dim, index); |
| } |
| |
| static inline Tensor boolToIndexingTensorCPUOrCUDA(const Tensor& self, bool value) { |
| // booleans add a dimension of size 1. true indexes this dimension as if 0:, false as empty. |
| if (value) { |
| return at::native::zeros({1}, {}, self.options().dtype(kLong)); |
| } else { |
| return at::native::empty({0}, {}, self.options().dtype(kLong)); |
| } |
| } |
| |
| static inline Tensor boolToIndexingTensorNonNativeDeviceType(const Tensor& self, bool value) { |
| // booleans add a dimension of size 1. true indexes this dimension as if 0:, false as empty. |
| if (value) { |
| return at::zeros({1}, {}, self.options().dtype(kLong)); |
| } else { |
| return at::empty({0}, {}, self.options().dtype(kLong)); |
| } |
| } |
| |
| static inline Tensor boolToIndexingTensor(const Tensor& self, bool value, const at::Device& self_device) { |
| if (self_device == at::kCPU || self_device == at::kCUDA) { |
| return boolToIndexingTensorCPUOrCUDA(self, value); |
| } else { |
| return boolToIndexingTensorNonNativeDeviceType(self, value); |
| } |
| } |
| |
| static inline Tensor scalarToTensorCPUOrCUDA(Scalar v, const TensorOptions& options) { |
| return at::native::scalar_tensor(v, options); |
| } |
| |
| static inline Tensor scalarToTensorNonNativeDeviceType(Scalar v, const TensorOptions& options) { |
| return at::scalar_tensor(v, options); |
| } |
| |
| static inline void recordTensorIndex(const Tensor& tensor, std::vector<Tensor>& outIndices, int64_t* dim_ptr) { |
| // TODO: check scalarType |
| outIndices.resize(*dim_ptr + 1); |
| outIndices[*dim_ptr] = tensor; |
| (*dim_ptr)++; |
| }; |
| |
| static inline std::vector<Tensor> typeConvertIndices(const Tensor& self, std::vector<Tensor>&& indices) { |
| std::vector<Tensor> converted_inds(indices.size()); |
| for (size_t i = 0; i < indices.size(); ++i) { |
| const auto &ind = indices[i]; |
| if (ind.defined()) { |
| converted_inds[i] = ind.to(ind.options().device(self.device())); |
| } else { |
| converted_inds[i] = std::move(indices[i]); |
| } |
| } |
| return converted_inds; |
| } |
| |
| // NOTE: Why do we mirror instead of replace the `count_specified_dimensions` function |
| // in torch/csrc/autograd/python_variable_indexing.cpp? It's because |
| // `count_specified_dimensions` is on the hot path of Python tensor multi-dim indexing |
| // (i.e. it's called by `applySlicing` which is called by `THPVariable_getitem` / |
| // `THPVariable_setitem` when handling indexing of more than one dimension). If we were |
| // to merge the Python/C++ `count_specified_dimensions` function, on the Python side |
| // we would have to construct a `std::vector` container to be consumed by the C++ |
| // `count_specified_dimensions` function, which adds 100s of nanoseconds overhead and |
| // is undesirable. |
| static inline int64_t count_specified_dimensions(const ArrayRef<TensorIndex>& indices) { |
| // Count the number of indexed dimensions (everything but ellipsis and None) |
| int64_t count = 0; |
| for (auto& obj : indices) { |
| if (obj.is_tensor()) { |
| auto& tensor = obj.tensor(); |
| if (tensor.scalar_type() == kByte || tensor.scalar_type() == kBool) { |
| count += tensor.dim(); |
| } else { |
| count++; |
| } |
| } else if (!obj.is_none() && !obj.is_ellipsis() && !obj.is_boolean()) { |
| count++; |
| } |
| } |
| return count; |
| } |
| } // namespace impl |
| |
| // NOTE: Many functions below are only for consumption from Python indexing |
| // implementation, they include: |
| // |
| // - `Tensor scalarToTensor(...)` |
| // - `IntArrayRef slicePrefix1sSize(...)` |
| // - `void copy_to(...)` |
| // - `Tensor handleDimInMultiDimIndexing(...)` |
| // - `Tensor dispatch_index(...)` |
| // - `Tensor dispatch_index_put_(...)` |
| // - `Tensor get_item(...)` |
| // - `void set_item(...)` |
| // |
| // The rest of the functions are in `at::indexing::impl` namespace, signifying |
| // that they shouldn't be used from Python indexing implementation. |
| static inline Tensor scalarToTensor(Scalar v, const TensorOptions& options, const at::Device& self_device) { |
| if (self_device == at::kCPU || self_device == at::kCUDA) { |
| return impl::scalarToTensorCPUOrCUDA(v, options); |
| } else { |
| return impl::scalarToTensorNonNativeDeviceType(v, options); |
| } |
| } |
| |
| // To match numpy semantics: |
| // As a special case for backwards compatibility, |
| // strip away unit dimensions from the left of 'src' |
| static inline IntArrayRef slicePrefix1sSize(const IntArrayRef& sizes) { |
| size_t first_non1_src = sizes.size(); |
| for (size_t i = 0; i < sizes.size(); ++i) { |
| if (sizes[i] != 1) { |
| first_non1_src = i; |
| break; |
| } |
| } |
| |
| return sizes.slice(first_non1_src); |
| } |
| |
| static inline void copy_to(const Tensor& dst, const Tensor& src) { |
| Tensor b_src; |
| std::tie(b_src) = expand_inplace(dst, src.view(slicePrefix1sSize(src.sizes())), "setitem"); |
| dst.copy_(b_src); |
| } |
| |
| // See NOTE [ Setting `disable_slice_optimization` when calling C++ tensor indexing functions from Python ] |
| static inline Tensor handleDimInMultiDimIndexing( |
| const Tensor& prev_dim_result, |
| const Tensor& original_tensor, |
| const TensorIndex& index, |
| int64_t* dim_ptr, |
| int64_t* specified_dims_ptr, |
| int64_t real_dim, |
| std::vector<Tensor>& outIndices, |
| bool disable_slice_optimization, |
| const at::Device& original_tensor_device, |
| const IntArrayRef& prev_dim_result_sizes) { |
| if (index.is_integer()) { |
| return impl::applySelect(prev_dim_result, *dim_ptr, index.integer(), real_dim, original_tensor_device, prev_dim_result_sizes); |
| } else if (index.is_slice()) { |
| Tensor result = impl::applySlice( |
| prev_dim_result, |
| *dim_ptr, |
| index.slice().start(), |
| index.slice().stop(), |
| index.slice().step(), |
| /*disable_slice_optimization=*/disable_slice_optimization, |
| original_tensor_device, |
| prev_dim_result_sizes); |
| (*dim_ptr)++; |
| return result; |
| } else if (index.is_ellipsis()) { |
| (*dim_ptr) += original_tensor.dim() - (*specified_dims_ptr); |
| return prev_dim_result; |
| } else if (index.is_none()) { |
| Tensor result = prev_dim_result.unsqueeze(*dim_ptr); |
| (*dim_ptr)++; |
| return result; |
| } else if (index.is_boolean()) { |
| Tensor result = prev_dim_result.unsqueeze(*dim_ptr); |
| impl::recordTensorIndex(impl::boolToIndexingTensor(result, index.boolean(), original_tensor_device), outIndices, dim_ptr); |
| return result; |
| } else if (index.is_tensor()) { |
| Tensor result = prev_dim_result; |
| const Tensor& tensor = index.tensor(); |
| auto scalar_type = tensor.scalar_type(); |
| if (tensor.dim() == 0 && at::isIntegralType(scalar_type, /*includeBool=*/true)) { |
| if (scalar_type != at::kByte && scalar_type != at::kBool) { |
| result = impl::applySelect(result, *dim_ptr, tensor.item<int64_t>(), real_dim, original_tensor_device, prev_dim_result_sizes); |
| } else { |
| result = result.unsqueeze(*dim_ptr); |
| if (scalar_type == at::kBool) { |
| impl::recordTensorIndex(impl::boolToIndexingTensor(result, tensor.item<bool>() != 0, original_tensor_device), outIndices, dim_ptr); |
| } else { |
| impl::recordTensorIndex(impl::boolToIndexingTensor(result, tensor.item<uint8_t>() != 0, original_tensor_device), outIndices, dim_ptr); |
| } |
| } |
| } else { |
| impl::recordTensorIndex(tensor, outIndices, dim_ptr); |
| } |
| return result; |
| } else { |
| TORCH_INTERNAL_ASSERT(false, "Invalid TensorIndex type"); |
| } |
| } |
| |
| namespace impl { |
| // This mirrors `applySlicing` in torch/csrc/autograd/python_variable_indexing.cpp |
| static inline Tensor applySlicing( |
| const Tensor& self, |
| const ArrayRef<TensorIndex>& indices, |
| std::vector<Tensor>& outIndices, |
| bool disable_slice_optimization, |
| const at::Device& self_device, |
| const IntArrayRef& self_sizes) { |
| int64_t dim = 0; |
| int64_t specified_dims = impl::count_specified_dimensions(indices); |
| |
| TORCH_CHECK_INDEX( |
| specified_dims <= (int64_t)self_sizes.size(), |
| "too many indices for tensor of dimension ", (int)self_sizes.size()); |
| |
| Tensor result = self; |
| for (size_t i = 0; i < indices.size(); i++) { |
| auto& obj = indices[i]; |
| result = handleDimInMultiDimIndexing( |
| /*prev_dim_result=*/result, |
| /*original_tensor=*/self, |
| /*index=*/obj, |
| /*dim=*/&dim, |
| /*specified_dims=*/&specified_dims, |
| /*real_dim=*/i, |
| /*outIndices=*/outIndices, |
| /*disable_slice_optimization=*/disable_slice_optimization, |
| /*original_tensor_device=*/self_device, |
| /*prev_dim_result_sizes=*/result.sizes()); |
| } |
| return result; |
| } |
| } // namespace impl |
| |
| static inline Tensor dispatch_index(const Tensor& self, std::vector<Tensor>&& indices) { |
| return self.index(impl::typeConvertIndices(self, std::move(indices))); |
| } |
| |
| static inline Tensor dispatch_index_put_(Tensor& self, std::vector<Tensor>&& indices, const Tensor& value) { |
| return self.index_put_(impl::typeConvertIndices(self, std::move(indices)), value); |
| } |
| |
| // NOTE [ Setting `disable_slice_optimization` when calling C++ tensor indexing functions from Python ] |
| // |
| // Question: When should we set `disable_slice_optimization` to `true` when calling C++ tensor indexing |
| // functions from Python indexing code? |
| // |
| // Answer: What "slice optimization" means: when we have a slicing expression like `x[0:5, 0]`, where the sliced tensor |
| // was of size 5 in dimension 0, we would skip dispatching the actual slice call as an optimization. However, here are |
| // the cases where we DON'T want this optimization: |
| // |
| // 1. When we are doing 1-D slicing (e.g. `tensor[:]`). |
| // Reason: we always return a shallow copy for expressions such as `tensor[:]` / `tensor[...]` / `tensor[:, :]`. |
| // (Note that for `tensor[:, :]`, we return an alias of `tensor` by doing the following: |
| // ``` |
| // Tensor sliced = impl::applySlicing(self, indices, tensorIndices, disable_slice_optimization, self_device, self_sizes); |
| // if (tensorIndices.empty()) { |
| // if (sliced.is_same(self)) { |
| // // ensure we return a shallow copy for things like x[...] |
| // sliced = at::alias(sliced); |
| // } |
| // return sliced; |
| // } |
| // ```) |
| // 2. When we are doing JIT tracing. |
| // Reason: JIT tracing needs the `self.slice(...)` call to properly trace the slice operation. |
| |
| // This mirrors `THPVariable_getitem` in torch/csrc/autograd/python_variable_indexing.cpp |
| // See NOTE [ Setting `disable_slice_optimization` when calling C++ tensor indexing functions from Python ] |
| static inline Tensor get_item(const Tensor& self, const ArrayRef<TensorIndex>& indices, bool disable_slice_optimization = false) { |
| at::Device self_device = self.device(); |
| IntArrayRef self_sizes = self.sizes(); |
| |
| // handle simple types: integers, slices, none, ellipsis, bool |
| if (indices.size() == 1) { |
| const TensorIndex& index = indices[0]; |
| if (index.is_integer()) { |
| return impl::applySelect(self, 0, index.integer(), 0, self_device, self_sizes); |
| } else if (index.is_slice()) { |
| return impl::applySlice( |
| self, |
| 0, |
| index.slice().start(), |
| index.slice().stop(), |
| index.slice().step(), |
| /*disable_slice_optimization=*/true, |
| self_device, |
| self_sizes); |
| } else if (index.is_none()) { |
| return self.unsqueeze(0); |
| } else if (index.is_ellipsis()) { |
| return at::alias(self); |
| } else if (index.is_boolean()) { |
| Tensor result = self.unsqueeze(0); |
| return dispatch_index( |
| result, |
| std::vector<Tensor>{impl::boolToIndexingTensor(result, index.boolean(), self_device)} |
| ); |
| } |
| } |
| |
| std::vector<Tensor> tensorIndices; |
| Tensor sliced = impl::applySlicing(self, indices, tensorIndices, disable_slice_optimization, self_device, self_sizes); |
| if (tensorIndices.empty()) { |
| if (sliced.is_same(self)) { |
| // ensure we return a shallow copy for things like x[...] |
| sliced = at::alias(sliced); |
| } |
| return sliced; |
| } |
| |
| // indexing by tensors ("advanced" indexing) |
| return dispatch_index(sliced, std::move(tensorIndices)); |
| } |
| |
| // This mirrors `THPVariable_setitem` in torch/csrc/autograd/python_variable_indexing.cpp |
| // for "the assigned value is a Tensor" case |
| // See NOTE [ Setting `disable_slice_optimization` when calling C++ tensor indexing functions from Python ] |
| static inline void set_item(Tensor& self, const ArrayRef<TensorIndex>& indices, const Tensor& value, bool disable_slice_optimization = false) { |
| at::Device self_device = self.device(); |
| IntArrayRef self_sizes = self.sizes(); |
| |
| // handle simple types: integers, slices, ellipsis, bool |
| if (indices.size() == 1) { |
| const TensorIndex& index = indices[0]; |
| if (index.is_boolean() && !index.boolean()) { |
| // do nothing for false (technically we should check the size, but we don't have |
| // real 0-sized shapes. |
| return; |
| } else if (index.is_ellipsis()) { |
| copy_to(self, value); |
| return; |
| } else if (index.is_none() || (index.is_boolean() && index.boolean())) { |
| copy_to(self.unsqueeze(0), value); |
| return; |
| } else if (index.is_integer()) { |
| copy_to(impl::applySelect(self, 0, index.integer(), 0, self_device, self_sizes), value); |
| return; |
| } else if (index.is_slice()) { |
| copy_to(impl::applySlice( |
| self, |
| 0, |
| index.slice().start(), |
| index.slice().stop(), |
| index.slice().step(), |
| /*disable_slice_optimization=*/disable_slice_optimization, |
| self_device, |
| self_sizes), value); |
| return; |
| } |
| } |
| |
| std::vector<Tensor> tensorIndices; |
| Tensor sliced = impl::applySlicing(self, indices, tensorIndices, disable_slice_optimization, self_device, self_sizes); |
| if (tensorIndices.empty()) { |
| copy_to(sliced, value); |
| return; |
| } |
| |
| IntArrayRef valueSizes = value.sizes(); |
| IntArrayRef slicedValueSizes = slicePrefix1sSize(valueSizes); |
| Tensor valuesSliced; |
| if (!valueSizes.equals(slicedValueSizes)) { |
| valuesSliced = value.view(slicedValueSizes); |
| } else { |
| valuesSliced = value; |
| } |
| dispatch_index_put_(sliced, std::move(tensorIndices), valuesSliced); |
| return; |
| } |
| |
| } // namespace indexing |
| } // namespace at |